aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll40
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir21
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir5
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir6
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll13
-rw-r--r--llvm/test/CodeGen/AArch64/f16-instructions.ll21
-rw-r--r--llvm/test/CodeGen/AArch64/fcvt-fixed.ll112
-rw-r--r--llvm/test/CodeGen/AArch64/fdiv-combine.ll152
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fadd.ll42
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll (renamed from llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll)374
-rw-r--r--llvm/test/CodeGen/AMDGPU/fsub.ll38
-rw-r--r--llvm/test/CodeGen/AMDGPU/mad_int24.ll313
-rw-r--r--llvm/test/CodeGen/AMDGPU/mad_uint24.ll1492
-rw-r--r--llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll4064
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir8
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir10
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir12
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir16
-rw-r--r--llvm/test/CodeGen/SPIRV/llc-pipeline.ll214
-rw-r--r--llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll48
-rw-r--r--llvm/test/CodeGen/WebAssembly/saturating-truncation.ll87
-rw-r--r--llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll11
-rw-r--r--llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll36
-rw-r--r--llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s95
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s51
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s26
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s140
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s2
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s15
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s34
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_sop1.s67
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_sop2.s67
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_sopc.s5
-rw-r--r--llvm/test/MC/AMDGPU/literals.s1826
-rw-r--r--llvm/test/MC/AMDGPU/vop3-literal.s3
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt36
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt2
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt138
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt2
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt26
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt48
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt58
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt4
-rw-r--r--llvm/test/Other/new-pm-print-pipeline.ll2
-rw-r--r--llvm/test/Transforms/GVN/ptrtoaddr.ll30
-rw-r--r--llvm/test/Transforms/InstCombine/fold-selective-shift.ll323
-rw-r--r--llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll164
-rw-r--r--llvm/test/Transforms/LoopRotate/multiple-exits.ll236
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll9
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll48
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll195
-rw-r--r--llvm/test/Transforms/LoopVectorize/scalable-predication.ll1
-rw-r--r--llvm/test/Transforms/NewGVN/ptrtoaddr.ll29
-rw-r--r--llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll448
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll30
-rw-r--r--llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll130
-rw-r--r--llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s10
59 files changed, 6931 insertions, 4536 deletions
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
index b1fe7b1..7ba422d 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
@@ -615,22 +615,14 @@ define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) {
; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption'
; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption
; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption
-; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
-; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
-; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
-; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
-; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 4) ]
@@ -652,22 +644,14 @@ define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) {
; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption'
; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption
; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption
-; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
-; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
-; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count.
-; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
-; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT: Predicates:
-; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
index 6362ed6..9381f0f4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
@@ -1,11 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
-# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
...
---
name: fconstant_to_constant_s32
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -24,16 +25,17 @@ body: |
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
- %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
- %1:_(s64) = G_CONSTANT i64 524
- %2:_(p0) = G_PTR_ADD %0, %1(s64)
- G_STORE %3(s32), %2(p0) :: (store (s32))
+ %1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
+ %2:_(s64) = G_CONSTANT i64 524
+ %3:_(p0) = G_PTR_ADD %0, %2(s64)
+ G_STORE %1(s32), %3(p0) :: (store (s32))
RET_ReallyLR
...
---
name: fconstant_to_constant_s64
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -48,7 +50,7 @@ body: |
; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64))
; CHECK-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
- %c:_(s64) = G_FCONSTANT double 0.0
+ %c:_(s64) = G_FCONSTANT double 0.000000e+00
G_STORE %c(s64), %ptr(p0) :: (store (s64))
RET_ReallyLR
...
@@ -56,6 +58,7 @@ body: |
name: no_store_means_no_combine
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -71,7 +74,7 @@ body: |
; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c
; CHECK-NEXT: RET_ReallyLR implicit %add(s64)
%v:_(s64) = COPY $x0
- %c:_(s64) = G_FCONSTANT double 0.0
+ %c:_(s64) = G_FCONSTANT double 0.000000e+00
%add:_(s64) = G_FADD %v, %c
- RET_ReallyLR implicit %add
+ RET_ReallyLR implicit %add(s64)
...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
index c301e76..c00ce22 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
@@ -48,8 +48,9 @@ body: |
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: $x0 = COPY [[C1]](s64)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: $w0 = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
%0:_(s32) = G_FCONSTANT float 1.0
$w0 = COPY %0
%1:_(s64) = G_FCONSTANT double 2.0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
index ddf219d..c6df345 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
@@ -8,7 +8,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16_non_zero
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
body: |
bb.1.entry:
; NO-FP16-LABEL: name: nan
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
; NO-FP16-NEXT: $w0 = COPY %ext(s32)
; NO-FP16-NEXT: RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index cb5df07..322a96a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
;
; GISEL-LABEL: postidx32_shalf:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #0 ; =0x0
-; GISEL-NEXT: ldr h1, [x0], #4
-; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: movi d1, #0000000000000000
+; GISEL-NEXT: ldr h2, [x0], #4
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
; GISEL-NEXT: fmov w9, s0
-; GISEL-NEXT: fcvt s3, h1
-; GISEL-NEXT: fmov w8, s1
-; GISEL-NEXT: fcvt s2, h2
-; GISEL-NEXT: fcmp s3, s2
+; GISEL-NEXT: fcvt s3, h2
+; GISEL-NEXT: fmov w8, s2
+; GISEL-NEXT: fcvt s1, h1
+; GISEL-NEXT: fcmp s3, s1
; GISEL-NEXT: csel w8, w8, w9, mi
; GISEL-NEXT: strh w8, [x1]
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index adc536d..b234ef7 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) {
;
; CHECK-CVT-GI-LABEL: test_fccmp:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
-; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
; CHECK-CVT-GI-NEXT: fcvt s2, h0
-; CHECK-CVT-GI-NEXT: fmov s1, w8
-; CHECK-CVT-GI-NEXT: fmov s3, w9
-; CHECK-CVT-GI-NEXT: fmov w9, s0
-; CHECK-CVT-GI-NEXT: fcvt s1, h1
-; CHECK-CVT-GI-NEXT: fcvt s3, h3
-; CHECK-CVT-GI-NEXT: fcmp s2, s1
-; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi
-; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
+; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1
+; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1]
+; CHECK-CVT-GI-NEXT: fmov w8, s0
+; CHECK-CVT-GI-NEXT: fcvt s3, h1
+; CHECK-CVT-GI-NEXT: fmov w9, s1
+; CHECK-CVT-GI-NEXT: fcvt s4, h4
+; CHECK-CVT-GI-NEXT: fcmp s2, s3
+; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi
+; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 51aad4fe..7409bfb 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -166,9 +166,9 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -206,9 +206,9 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -246,9 +246,9 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -286,9 +286,9 @@ define i64 @fcvtzs_f16_i64_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -470,9 +470,9 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -510,9 +510,9 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -550,9 +550,9 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -590,9 +590,9 @@ define i64 @fcvtzu_f16_i64_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -775,10 +775,10 @@ define half @scvtf_f16_i32_7(i32 %int) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -815,10 +815,10 @@ define half @scvtf_f16_i32_15(i32 %int) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -855,10 +855,10 @@ define half @scvtf_f16_i64_7(i64 %long) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -895,10 +895,10 @@ define half @scvtf_f16_i64_15(i64 %long) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1079,10 +1079,10 @@ define half @ucvtf_f16_i32_7(i32 %int) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1119,10 +1119,10 @@ define half @ucvtf_f16_i32_15(i32 %int) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI45_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1159,10 +1159,10 @@ define half @ucvtf_f16_i64_7(i64 %long) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI46_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1199,10 +1199,10 @@ define half @ucvtf_f16_i64_15(i64 %long) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI47_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1373,9 +1373,9 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI55_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1413,9 +1413,9 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI56_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1453,9 +1453,9 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI57_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1493,9 +1493,9 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI58_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1667,9 +1667,9 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI66_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1707,9 +1707,9 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI67_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1747,9 +1747,9 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI68_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1787,9 +1787,9 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI69_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index 91bb8ac..9eacb61 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -12,22 +12,14 @@
; =>
; recip = 1.0 / D; a * recip; b * recip; c * recip;
define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
-; CHECK-SD-LABEL: three_fdiv_float:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov s4, #1.00000000
-; CHECK-SD-NEXT: fdiv s4, s4, s0
-; CHECK-SD-NEXT: fmul s0, s1, s4
-; CHECK-SD-NEXT: fmul s1, s2, s4
-; CHECK-SD-NEXT: fmul s2, s3, s4
-; CHECK-SD-NEXT: b foo_3f
-;
-; CHECK-GI-LABEL: three_fdiv_float:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv s4, s1, s0
-; CHECK-GI-NEXT: fdiv s1, s2, s0
-; CHECK-GI-NEXT: fdiv s2, s3, s0
-; CHECK-GI-NEXT: fmov s0, s4
-; CHECK-GI-NEXT: b foo_3f
+; CHECK-LABEL: three_fdiv_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s4, #1.00000000
+; CHECK-NEXT: fdiv s4, s4, s0
+; CHECK-NEXT: fmul s0, s1, s4
+; CHECK-NEXT: fmul s1, s2, s4
+; CHECK-NEXT: fmul s2, s3, s4
+; CHECK-NEXT: b foo_3f
%div = fdiv arcp float %a, %D
%div1 = fdiv arcp float %b, %D
%div2 = fdiv arcp float %c, %D
@@ -36,22 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
}
define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
-; CHECK-SD-LABEL: three_fdiv_double:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov d4, #1.00000000
-; CHECK-SD-NEXT: fdiv d4, d4, d0
-; CHECK-SD-NEXT: fmul d0, d1, d4
-; CHECK-SD-NEXT: fmul d1, d2, d4
-; CHECK-SD-NEXT: fmul d2, d3, d4
-; CHECK-SD-NEXT: b foo_3d
-;
-; CHECK-GI-LABEL: three_fdiv_double:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv d4, d1, d0
-; CHECK-GI-NEXT: fdiv d1, d2, d0
-; CHECK-GI-NEXT: fdiv d2, d3, d0
-; CHECK-GI-NEXT: fmov d0, d4
-; CHECK-GI-NEXT: b foo_3d
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d4, #1.00000000
+; CHECK-NEXT: fdiv d4, d4, d0
+; CHECK-NEXT: fmul d0, d1, d4
+; CHECK-NEXT: fmul d1, d2, d4
+; CHECK-NEXT: fmul d2, d3, d4
+; CHECK-NEXT: b foo_3d
%div = fdiv arcp double %a, %D
%div1 = fdiv arcp double %b, %D
%div2 = fdiv arcp double %c, %D
@@ -60,22 +44,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
}
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-SD-LABEL: three_fdiv_4xfloat:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
-; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
-; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
-; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
-; CHECK-SD-NEXT: b foo_3_4xf
-;
-; CHECK-GI-LABEL: three_fdiv_4xfloat:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv v4.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v0.4s
-; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s
-; CHECK-GI-NEXT: mov v0.16b, v4.16b
-; CHECK-GI-NEXT: b foo_3_4xf
+; CHECK-LABEL: three_fdiv_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v4.4s, #1.00000000
+; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
+; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
+; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
+; CHECK-NEXT: b foo_3_4xf
%div = fdiv arcp <4 x float> %a, %D
%div1 = fdiv arcp <4 x float> %b, %D
%div2 = fdiv arcp <4 x float> %c, %D
@@ -84,22 +60,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
}
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-SD-LABEL: three_fdiv_2xdouble:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov v4.2d, #1.00000000
-; CHECK-SD-NEXT: fdiv v4.2d, v4.2d, v0.2d
-; CHECK-SD-NEXT: fmul v0.2d, v1.2d, v4.2d
-; CHECK-SD-NEXT: fmul v1.2d, v2.2d, v4.2d
-; CHECK-SD-NEXT: fmul v2.2d, v3.2d, v4.2d
-; CHECK-SD-NEXT: b foo_3_2xd
-;
-; CHECK-GI-LABEL: three_fdiv_2xdouble:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv v4.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: fdiv v1.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT: mov v0.16b, v4.16b
-; CHECK-GI-NEXT: b foo_3_2xd
+; CHECK-LABEL: three_fdiv_2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v4.2d, #1.00000000
+; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d
+; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d
+; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d
+; CHECK-NEXT: b foo_3_2xd
%div = fdiv arcp <2 x double> %a, %D
%div1 = fdiv arcp <2 x double> %b, %D
%div2 = fdiv arcp <2 x double> %c, %D
@@ -135,26 +103,47 @@ define void @two_fdiv_double(double %D, double %a, double %b) {
ret void
}
-define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
+define void @four_fdiv_multi_float(float %D, float %a, float %b, float %c) #0 {
+; CHECK-SD-LABEL: four_fdiv_multi_float:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
-; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
-; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
-; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
-; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
-; CHECK-SD-NEXT: b foo_3_4xf
+; CHECK-SD-NEXT: fmov s4, #1.00000000
+; CHECK-SD-NEXT: fdiv s5, s4, s0
+; CHECK-SD-NEXT: fmul s4, s1, s5
+; CHECK-SD-NEXT: fmul s1, s2, s5
+; CHECK-SD-NEXT: fmul s2, s3, s5
+; CHECK-SD-NEXT: fmul s3, s0, s5
+; CHECK-SD-NEXT: fmov s0, s4
+; CHECK-SD-NEXT: b foo_4f
;
-; CHECK-GI-LABEL: splat_three_fdiv_4xfloat:
+; CHECK-GI-LABEL: four_fdiv_multi_float:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT: dup v4.4s, v0.s[0]
-; CHECK-GI-NEXT: fdiv v0.4s, v1.4s, v4.4s
-; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v4.4s
-; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v4.4s
-; CHECK-GI-NEXT: b foo_3_4xf
+; CHECK-GI-NEXT: fmov s4, #1.00000000
+; CHECK-GI-NEXT: fdiv s5, s4, s0
+; CHECK-GI-NEXT: fdiv s4, s0, s0
+; CHECK-GI-NEXT: fmul s0, s1, s5
+; CHECK-GI-NEXT: fmul s1, s2, s5
+; CHECK-GI-NEXT: fmul s2, s3, s5
+; CHECK-GI-NEXT: fmov s3, s4
+; CHECK-GI-NEXT: b foo_4f
+ %div = fdiv arcp float %a, %D
+ %div1 = fdiv arcp float %b, %D
+ %div2 = fdiv arcp float %c, %D
+ %div3 = fdiv arcp float %D, %D
+ tail call void @foo_4f(float %div, float %div1, float %div2, float %div3)
+ ret void
+}
+
+define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: splat_three_fdiv_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: fmov v4.4s, #1.00000000
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
+; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
+; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
+; CHECK-NEXT: b foo_3_4xf
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
%div = fdiv arcp <4 x float> %a, %splat
@@ -256,6 +245,7 @@ entry:
}
declare void @foo_3f(float, float, float)
+declare void @foo_4f(float, float, float, float)
declare void @foo_3d(double, double, double)
declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
index 594a3ab..be07978 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
@@ -38,10 +38,10 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_v2HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[1]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
@@ -88,10 +88,10 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_v3HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -152,10 +152,10 @@ define half @add_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI3_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI3_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -250,9 +250,9 @@ define half @add_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI4_0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI4_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -448,9 +448,9 @@ define half @add_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_2H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI7_0
; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: fmov s2, w8
+; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI7_0]
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 18f463c..40925da 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -405,26 +405,23 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
-; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8]
-; CHECK-GI-NOFP16-NEXT: fmov s1, w9
+; CHECK-GI-NOFP16-NEXT: ldr d1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
; CHECK-GI-NOFP16-NEXT: cmp w8, #56
-; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fmov w9, s0
; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop:
@@ -521,28 +518,25 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
-; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
; CHECK-GI-NOFP16-NEXT: .LBB14_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: ldr q1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
; CHECK-GI-NOFP16-NEXT: cmp w8, #56
-; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fmov s1, w9
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fmov w9, s0
; CHECK-GI-NOFP16-NEXT: b.ne .LBB14_1
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop:
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
index e1b2170..c10d6e9 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
@@ -52,10 +52,10 @@ define half @mul_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -144,9 +144,9 @@ define half @mul_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -321,9 +321,9 @@ define half @mul_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_2H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI5_0
; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: fmov s2, w8
+; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0]
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
index 2ad6e68..f730199 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
@@ -70,7 +70,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -118,7 +118,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg() {
define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -154,7 +154,7 @@ define void @func_uses_asm_physreg_agpr() {
define void @func_uses_asm_physreg_agpr_tuple() {
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -168,7 +168,7 @@ declare void @unknown()
define amdgpu_kernel void @kernel_calls_extern() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT: call void @unknown() #[[ATTR29:[0-9]+]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -192,7 +192,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: call void [[INDIRECT]]()
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -204,8 +204,8 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR29]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -316,7 +316,7 @@ define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -342,7 +342,7 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -354,7 +354,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -378,7 +378,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -390,7 +390,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -402,7 +402,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR5]] {
; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -414,7 +414,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR6:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -426,7 +426,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
define amdgpu_kernel void @kernel_uses_asm_clobber() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -438,7 +438,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber() {
define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -450,7 +450,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -462,7 +462,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -474,7 +474,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -486,7 +486,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -498,7 +498,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -510,7 +510,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10]] {
; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -522,7 +522,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
define amdgpu_kernel void @vreg_use_exceeds_register_file() {
; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -534,7 +534,7 @@ define amdgpu_kernel void @vreg_use_exceeds_register_file() {
define amdgpu_kernel void @vreg_def_exceeds_register_file() {
; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9]] {
; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -546,7 +546,7 @@ define amdgpu_kernel void @vreg_def_exceeds_register_file() {
define amdgpu_kernel void @multiple() {
; CHECK-LABEL: define amdgpu_kernel void @multiple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10]] {
; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -558,7 +558,7 @@ define amdgpu_kernel void @multiple() {
define amdgpu_kernel void @earlyclobber_0() {
; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR11:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -570,7 +570,7 @@ define amdgpu_kernel void @earlyclobber_0() {
define amdgpu_kernel void @earlyclobber_1() {
; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR12:[0-9]+]] {
; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -582,7 +582,7 @@ define amdgpu_kernel void @earlyclobber_1() {
define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() {
; CHECK-LABEL: define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR13:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -594,7 +594,7 @@ define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() {
define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() {
; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR13]] {
; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i32>, <16 x i32> } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -606,7 +606,7 @@ define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() {
define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR14:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i32> } asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -618,7 +618,7 @@ define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() {
; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR11]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -630,7 +630,7 @@ define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() {
define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_1(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR15:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -642,7 +642,7 @@ define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
define amdgpu_kernel void @physreg_raises_limit() {
; CHECK-LABEL: define amdgpu_kernel void @physreg_raises_limit(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR16:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -652,10 +652,9 @@ define amdgpu_kernel void @physreg_raises_limit() {
ret void
}
-; FIXME: This should require 9. We cannot allocate an a128 at a0.
define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
; CHECK-LABEL: define amdgpu_kernel void @physreg_tuple_alignment_raises_limit(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR11]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -667,7 +666,7 @@ define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
define amdgpu_kernel void @align3_virtreg() {
; CHECK-LABEL: define amdgpu_kernel void @align3_virtreg(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR6]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -679,7 +678,7 @@ define amdgpu_kernel void @align3_virtreg() {
define amdgpu_kernel void @align3_align4_virtreg() {
; CHECK-LABEL: define amdgpu_kernel void @align3_align4_virtreg(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR15]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -691,7 +690,7 @@ define amdgpu_kernel void @align3_align4_virtreg() {
define amdgpu_kernel void @align2_align4_virtreg() {
; CHECK-LABEL: define amdgpu_kernel void @align2_align4_virtreg(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR15]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -703,7 +702,7 @@ define amdgpu_kernel void @align2_align4_virtreg() {
define amdgpu_kernel void @kernel_uses_write_register_a55() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
-; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR17:[0-9]+]] {
; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META0:![0-9]+]], i32 0)
; CHECK-NEXT: ret void
;
@@ -713,71 +712,313 @@ define amdgpu_kernel void @kernel_uses_write_register_a55() {
define amdgpu_kernel void @kernel_uses_write_register_v55() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
-; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META1:![0-9]+]], i32 0)
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @llvm.write_register.i64(metadata !1, i32 0)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
-; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-SAME: ) #[[ATTR18:[0-9]+]] {
; CHECK-NEXT: call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0)
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @llvm.write_register.i64(metadata !2, i96 0)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19:[0-9]+]] {
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]])
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%reg = call i32 @llvm.read_register.i64(metadata !0)
store i32 %reg, ptr addrspace(1) %ptr
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19]] {
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]])
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%reg = call i32 @llvm.read_volatile_register.i64(metadata !0)
store i32 %reg, ptr addrspace(1) %ptr
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR20:[0-9]+]] {
; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]])
; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%reg = call i128 @llvm.read_register.i64(metadata !3)
store i128 %reg, ptr addrspace(1) %ptr
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
-; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-SAME: ) #[[ATTR9]] {
; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0)
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @llvm.write_register.i64(metadata !4, i32 0)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_multiple_uses() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_uses(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(i64 poison)
+ call void asm sideeffect "; use $0", "a"(i32 poison)
+ call void asm sideeffect "; use $0", "a"(i128 poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_multiple_defs() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_defs(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 asm sideeffect "
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT: [[TMP3:%.*]] = call i128 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call i64 asm sideeffect "; def $0", "=a"()
+ call i32 asm sideeffect "; def $0", "=a"()
+ call i128 asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_multiple_use_defs() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_use_defs(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(i32 poison)
+ call i128 asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define void @callgraph_b() {
+; CHECK-LABEL: define void @callgraph_b(
+; CHECK-SAME: ) #[[ATTR15]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call <4 x i32> asm sideeffect "; def $0", "=a"()
+ call void asm sideeffect "; use $0", "a"(<8 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define void @callgraph_c() {
+; CHECK-LABEL: define void @callgraph_c(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call i32 asm sideeffect "; def $0", "=a"()
+ call void asm sideeffect "; use $0", "a"(<2 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define void @callgraph_a(i1 %cond) {
+; CHECK-LABEL: define void @callgraph_a(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR15]] {
+; CHECK-NEXT: br i1 [[COND]], label [[A:%.*]], label [[B:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @callgraph_b()
+; CHECK-NEXT: ret void
+; CHECK: b:
+; CHECK-NEXT: call void @callgraph_c()
+; CHECK-NEXT: ret void
+;
+ br i1 %cond, label %a, label %b
+
+a:
+ call void @callgraph_b()
+ ret void
+
+b:
+ call void @callgraph_c()
+ ret void
+}
+
+
+define void @kernel_max_callgraph(i1 %cond) {
+; CHECK-LABEL: define void @kernel_max_callgraph(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR15]] {
+; CHECK-NEXT: call void @callgraph_a(i1 [[COND]])
+; CHECK-NEXT: ret void
+;
+ call void @callgraph_a(i1 %cond)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_all_virtregs() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs(
+; CHECK-SAME: ) #[[ATTR21:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a,a,a,a,a,a,a,a"(<32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1(
+; CHECK-SAME: ) #[[ATTR21]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a,a,a,a,a,a,a,a,a"(<32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define void @recursive() {
+; CHECK-LABEL: define void @recursive(
+; CHECK-SAME: ) #[[ATTR22:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: call void @recursive()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<7 x i32> poison)
+ call void @use_most()
+ call void @recursive()
+ ret void
+}
+
+define void @indirect_0() {
+; CHECK-LABEL: define void @indirect_0(
+; CHECK-SAME: ) #[[ATTR22]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<7 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define void @indirect_1() {
+; CHECK-LABEL: define void @indirect_1(
+; CHECK-SAME: ) #[[ATTR23:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call <3 x i32> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @knowable_indirect_call(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @knowable_indirect_call(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR22]] {
+; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @indirect_0, ptr @indirect_1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @indirect_1
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: call void @indirect_1()
+; CHECK-NEXT: br label [[TMP6:%.*]]
+; CHECK: 3:
+; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK: 4:
+; CHECK-NEXT: call void @indirect_0()
+; CHECK-NEXT: br label [[TMP6]]
+; CHECK: 5:
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %fptr = select i1 %cond, ptr @indirect_0, ptr @indirect_1
+ call void %fptr()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @calls_poison(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @calls_poison(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void poison()
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void poison()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @calls_null(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @calls_null(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void null()
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void null()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @indirect_unknown(ptr %fptr) {
+; CHECK-LABEL: define amdgpu_kernel void @indirect_unknown(
+; CHECK-SAME: ptr [[FPTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void [[FPTR]]()
+; CHECK-NEXT: ret void
+;
+ call void %fptr()
ret void
}
attributes #0 = { "amdgpu-agpr-alloc"="0" }
+attributes #1 = { "amdgpu-waves-per-eu"="1,1" }
!0 = !{!"a55"}
!1 = !{!"v55"}
@@ -787,16 +1028,35 @@ attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="2" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="4" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="6" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="5" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="14" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-agpr-alloc"="256" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="32" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="9" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="64" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="49" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="33" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="13" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="56" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="58" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="60" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="256" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR22]] = { "amdgpu-agpr-alloc"="7" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="3" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR24:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR25:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR27:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR28:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR29]] = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: [[META0]] = !{!"a55"}
; CHECK: [[META1]] = !{!"v55"}
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 743431c..d6a9cb1 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -92,43 +92,11 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_f32(ptr addrspace(1) %out, ptr addrsp
ret void
}
-; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
-; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
-; SI-NOT: xor
-define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
- %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
- %a = load float, ptr addrspace(1) %in, align 4
- %b = load float, ptr addrspace(1) %b_ptr, align 4
- %result = fsub float %a, %b
- %neg.result = fsub float -0.0, %result
- store float %neg.result, ptr addrspace(1) %out, align 4
- ret void
-}
-
-; For some reason the attribute has a string "true" or "false", so
-; make sure it is disabled and the fneg is not folded if it is not
-; "true".
-; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
-; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
-define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
- %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
- %a = load float, ptr addrspace(1) %in, align 4
- %b = load float, ptr addrspace(1) %b_ptr, align 4
- %result = fsub float %a, %b
- %neg.result = fsub float -0.0, %result
- store float %neg.result, ptr addrspace(1) %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32:
+; FUNC-LABEL: {{^}}v_fsub_0_nsz_flag_f32:
; SI-NOT: v_sub
-define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fsub_0_nsz_flag_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%a = load float, ptr addrspace(1) %in, align 4
- %result = fsub float %a, 0.0
+ %result = fsub nsz float %a, 0.0
store float %result, ptr addrspace(1) %out, align 4
ret void
}
-
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/mad_int24.ll b/llvm/test/CodeGen/AMDGPU/mad_int24.ll
index 93fda94..dd88310 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_int24.ll
@@ -1,17 +1,79 @@
-; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefix=GCN --check-prefix=FUNC
-; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=GCN --check-prefix=FUNC
-; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=amdgcn| FileCheck %s --check-prefixes=GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefixes=VI
+; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=EG,R600,RW
+; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefixes=EG,R600,CM
-; FUNC-LABEL: {{^}}i32_mad24:
; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-; EG: MULLO_INT
-; CM: MULLO_INT
-; GCN: s_bfe_i32
-; GCN: s_bfe_i32
-; GCN: s_mul_i32
-; GCN: s_add_i32
define amdgpu_kernel void @i32_mad24(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
+; GCN-LABEL: i32_mad24:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000
+; GCN-NEXT: s_bfe_i32 s1, s1, 0x180000
+; GCN-NEXT: s_mul_i32 s0, s0, s1
+; GCN-NEXT: s_add_i32 s0, s0, s2
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; VI-LABEL: i32_mad24:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
+; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_bfe_i32 s0, s0, 0x180000
+; VI-NEXT: s_bfe_i32 s1, s1, 0x180000
+; VI-NEXT: s_mul_i32 s0, s0, s1
+; VI-NEXT: s_add_i32 s0, s0, s2
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT: s_endpgm
+;
+; RW-LABEL: i32_mad24:
+; RW: ; %bb.0: ; %entry
+; RW-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
+; RW-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; RW-NEXT: CF_END
+; RW-NEXT: PAD
+; RW-NEXT: ALU clause starting at 4:
+; RW-NEXT: LSHL T0.W, KC0[2].Z, literal.x,
+; RW-NEXT: LSHL * T1.W, KC0[2].W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR T1.W, PS, literal.x,
+; RW-NEXT: ASHR * T0.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: MULLO_INT * T0.X, PS, PV.W,
+; RW-NEXT: ADD_INT T0.X, PS, KC0[3].X,
+; RW-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; RW-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: i32_mad24:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: LSHL T0.Z, KC0[2].Z, literal.x,
+; CM-NEXT: LSHL * T0.W, KC0[2].W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ASHR T1.Z, PV.W, literal.x,
+; CM-NEXT: ASHR * T0.W, PV.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T1.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T1.Z,
+; CM-NEXT: ADD_INT * T0.X, PV.X, KC0[3].X,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%0 = shl i32 %a, 8
%a_24 = ashr i32 %0, 8
@@ -23,13 +85,25 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}mad24_known_bits_destroyed:
-; GCN: s_waitcnt
-; GCN-NEXT: v_mad_i32_i24
-; GCN-NEXT: v_mul_i32_i24
-; GCN-NEXT: s_setpc_b64
define i32 @mad24_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
-
+; GCN-LABEL: mad24_known_bits_destroyed:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mad_i32_i24 v1, v0, v1, v2
+; GCN-NEXT: v_mul_i32_i24_e32 v0, v1, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mad24_known_bits_destroyed:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_mad_i32_i24 v1, v0, v1, v2
+; VI-NEXT: v_mul_i32_i24_e32 v0, v1, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; EG-LABEL: mad24_known_bits_destroyed:
+; EG: ; %bb.0:
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
%shl.0 = shl i32 %a, 8
%sra.0 = ashr i32 %shl.0, 8
%shl.1 = shl i32 %b, 8
@@ -48,12 +122,25 @@ define i32 @mad24_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
ret i32 %mul1
}
-; GCN-LABEL: {{^}}mad24_intrin_known_bits_destroyed:
-; GCN: s_waitcnt
-; GCN-NEXT: v_mad_i32_i24
-; GCN-NEXT: v_mul_i32_i24
-; GCN-NEXT: s_setpc_b64
define i32 @mad24_intrin_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
+; GCN-LABEL: mad24_intrin_known_bits_destroyed:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mad_i32_i24 v1, v0, v1, v2
+; GCN-NEXT: v_mul_i32_i24_e32 v0, v1, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mad24_intrin_known_bits_destroyed:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_mad_i32_i24 v1, v0, v1, v2
+; VI-NEXT: v_mul_i32_i24_e32 v0, v1, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; EG-LABEL: mad24_intrin_known_bits_destroyed:
+; EG: ; %bb.0:
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
%shl.0 = shl i32 %a, 8
%sra.0 = ashr i32 %shl.0, 8
%shl.1 = shl i32 %b, 8
@@ -73,17 +160,177 @@ define i32 @mad24_intrin_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
}
; Make sure no unnecessary BFEs are emitted in the loop.
-; GCN-LABEL: {{^}}mad24_destroyed_knownbits_2:
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
define void @mad24_destroyed_knownbits_2(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %arg3) {
+; GCN-LABEL: mad24_destroyed_knownbits_2:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v5, 1
+; GCN-NEXT: s_mov_b64 s[4:5], 0
+; GCN-NEXT: .LBB3_1: ; %bb6
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: v_mad_i32_i24 v0, v0, v5, v5
+; GCN-NEXT: v_add_i32_e32 v1, vcc, -1, v1
+; GCN-NEXT: v_mad_i32_i24 v5, v0, v5, v0
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_mad_i32_i24 v0, v5, v0, v5
+; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT: v_mad_i32_i24 v0, v0, v5, v0
+; GCN-NEXT: v_mov_b32_e32 v5, v2
+; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; GCN-NEXT: s_cbranch_execnz .LBB3_1
+; GCN-NEXT: ; %bb.2: ; %bb5
+; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s4, s6
+; GCN-NEXT: s_mov_b32 s5, s6
+; GCN-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mad24_destroyed_knownbits_2:
+; VI: ; %bb.0: ; %bb
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v5, 1
+; VI-NEXT: s_mov_b64 s[4:5], 0
+; VI-NEXT: .LBB3_1: ; %bb6
+; VI-NEXT: ; =>This Inner Loop Header: Depth=1
+; VI-NEXT: v_mad_i32_i24 v0, v0, v5, v5
+; VI-NEXT: v_mad_i32_i24 v5, v0, v5, v0
+; VI-NEXT: v_add_u32_e32 v1, vcc, -1, v1
+; VI-NEXT: v_mad_i32_i24 v0, v5, v0, v5
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mad_i32_i24 v0, v0, v5, v0
+; VI-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; VI-NEXT: v_mov_b32_e32 v5, v2
+; VI-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; VI-NEXT: s_cbranch_execnz .LBB3_1
+; VI-NEXT: ; %bb.2: ; %bb5
+; VI-NEXT: s_or_b64 exec, exec, s[4:5]
+; VI-NEXT: flat_store_dword v[3:4], v0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; RW-LABEL: mad24_destroyed_knownbits_2:
+; RW: ; %bb.0: ; %bb
+; RW-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; RW-NEXT: LOOP_START_DX10 @7
+; RW-NEXT: ALU_PUSH_BEFORE 30, @16, KC0[], KC1[]
+; RW-NEXT: JUMP @6 POP:1
+; RW-NEXT: LOOP_BREAK @6
+; RW-NEXT: POP @6 POP:1
+; RW-NEXT: END_LOOP @2
+; RW-NEXT: ALU 1, @47, KC0[], KC1[]
+; RW-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; RW-NEXT: CF_END
+; RW-NEXT: ALU clause starting at 10:
+; RW-NEXT: MOV T0.X, KC0[2].Y,
+; RW-NEXT: MOV T0.Y, KC0[2].Z,
+; RW-NEXT: MOV * T0.Z, KC0[2].W,
+; RW-NEXT: MOV T0.W, KC0[3].X,
+; RW-NEXT: MOV * T1.W, literal.x,
+; RW-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; RW-NEXT: ALU clause starting at 16:
+; RW-NEXT: LSHL T2.W, T1.W, literal.x,
+; RW-NEXT: LSHL * T3.W, T0.X, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR T3.W, PS, literal.x,
+; RW-NEXT: ASHR * T2.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: MULLO_INT * T0.X, PV.W, PS,
+; RW-NEXT: ADD_INT * T1.W, PS, T1.W,
+; RW-NEXT: LSHL * T3.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR * T3.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: MULLO_INT * T0.X, PV.W, T2.W,
+; RW-NEXT: ADD_INT * T1.W, PS, T1.W,
+; RW-NEXT: LSHL * T2.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR * T2.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: MULLO_INT * T0.X, PV.W, T3.W,
+; RW-NEXT: ADD_INT * T1.W, PS, T1.W,
+; RW-NEXT: LSHL * T3.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR * T3.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ADD_INT T0.Y, T0.Y, literal.x,
+; RW-NEXT: MULLO_INT * T0.X, PV.W, T2.W,
+; RW-NEXT: -1(nan), 0(0.000000e+00)
+; RW-NEXT: ADD_INT T0.X, PS, T1.W,
+; RW-NEXT: SETE_INT T2.W, PV.Y, 0.0,
+; RW-NEXT: MOV * T1.W, T0.Z,
+; RW-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; RW-NEXT: ALU clause starting at 47:
+; RW-NEXT: LSHR * T1.X, T0.W, literal.x,
+; RW-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: mad24_destroyed_knownbits_2:
+; CM: ; %bb.0: ; %bb
+; CM-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: LOOP_START_DX10 @7
+; CM-NEXT: ALU_PUSH_BEFORE 41, @16, KC0[], KC1[]
+; CM-NEXT: JUMP @6 POP:1
+; CM-NEXT: LOOP_BREAK @6
+; CM-NEXT: POP @6 POP:1
+; CM-NEXT: END_LOOP @2
+; CM-NEXT: ALU 1, @58, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T1.X, KC0[2].Y,
+; CM-NEXT: MOV T0.X, KC0[2].Z,
+; CM-NEXT: MOV T0.Y, KC0[2].W,
+; CM-NEXT: MOV T0.Z, KC0[3].X,
+; CM-NEXT: MOV * T0.W, literal.x,
+; CM-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: LSHL T1.Z, T0.W, literal.x,
+; CM-NEXT: LSHL * T1.W, T1.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ASHR T2.Z, PV.W, literal.x,
+; CM-NEXT: ASHR * T1.W, PV.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T1.X, T2.Z, T1.W,
+; CM-NEXT: MULLO_INT T1.Y (MASKED), T2.Z, T1.W,
+; CM-NEXT: MULLO_INT T1.Z (MASKED), T2.Z, T1.W,
+; CM-NEXT: MULLO_INT * T1.W (MASKED), T2.Z, T1.W,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.W,
+; CM-NEXT: LSHL * T2.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ASHR * T2.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T1.X, T2.W, T1.W,
+; CM-NEXT: MULLO_INT T1.Y (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT T1.Z (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT * T1.W (MASKED), T2.W, T1.W,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.W,
+; CM-NEXT: LSHL * T1.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ASHR * T1.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T1.X, T1.W, T2.W,
+; CM-NEXT: MULLO_INT T1.Y (MASKED), T1.W, T2.W,
+; CM-NEXT: MULLO_INT T1.Z (MASKED), T1.W, T2.W,
+; CM-NEXT: MULLO_INT * T1.W (MASKED), T1.W, T2.W,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.W,
+; CM-NEXT: LSHL * T2.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T0.X, T0.X, literal.x,
+; CM-NEXT: ASHR * T2.W, PV.W, literal.y,
+; CM-NEXT: -1(nan), 8(1.121039e-44)
+; CM-NEXT: MULLO_INT T1.X, T2.W, T1.W,
+; CM-NEXT: MULLO_INT T1.Y (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT T1.Z (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT * T1.W (MASKED), T2.W, T1.W,
+; CM-NEXT: ADD_INT T1.X, PV.X, T0.W,
+; CM-NEXT: SETE_INT T1.Z, T0.X, 0.0,
+; CM-NEXT: MOV * T0.W, T0.Y,
+; CM-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.Z, 0.0,
+; CM-NEXT: ALU clause starting at 58:
+; CM-NEXT: LSHR * T0.X, T0.Z, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
bb:
br label %bb6
@@ -119,3 +366,5 @@ bb6: ; preds = %bb6, %bb
}
declare i32 @llvm.amdgcn.mul.i24(i32, i32)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; R600: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
index a6d458e..46b8df4 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
@@ -1,19 +1,75 @@
-; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN
-; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
-; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=EG
+; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefixes=CM
+; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefixes=GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX8,SI
+; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX8,VI
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-; FUNC-LABEL: {{^}}u32_mad24:
-; EG: MULLO_INT
-; SI: s_mul_i32
-; SI: s_add_i32
-; VI: s_mul_{{[iu]}}32
-; VI: s_add_{{[iu]}}32
-
define amdgpu_kernel void @u32_mad24(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
+; EG-LABEL: u32_mad24:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x,
+; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.X, PS, PV.W,
+; EG-NEXT: ADD_INT T0.X, PS, KC0[3].X,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: u32_mad24:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
+; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
+; CM-NEXT: ADD_INT * T0.X, PV.X, KC0[3].X,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: u32_mad24:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_and_b32 s0, s0, 0xffffff
+; GCN-NEXT: s_and_b32 s1, s1, 0xffffff
+; GCN-NEXT: s_mul_i32 s0, s0, s1
+; GCN-NEXT: s_add_i32 s0, s0, s2
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: u32_mad24:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s6, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_and_b32 s0, s0, 0xffffff
+; GFX8-NEXT: s_and_b32 s1, s1, 0xffffff
+; GFX8-NEXT: s_mul_i32 s0, s0, s1
+; GFX8-NEXT: s_add_i32 s0, s0, s2
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = shl i32 %a, 8
%a_24 = lshr i32 %0, 8
@@ -25,18 +81,88 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}i16_mad24:
; The order of A and B does not matter.
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 16
-; GCN: s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}}
-; GCN: s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}}
-; GCN: s_sext_i32_i16 [[EXT:s[0-9]]], [[MAD]]
-; GCN: v_mov_b32_e32 v0, [[EXT]]
define amdgpu_kernel void @i16_mad24(ptr addrspace(1) %out, i16 %a, i16 %b, i16 %c) {
+; EG-LABEL: i16_mad24:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 0, @12, KC0[], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 4, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 40, #3
+; EG-NEXT: VTX_READ_16 T2.X, T0.X, 42, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: MULLO_INT * T0.Y, T1.X, T2.X,
+; EG-NEXT: ADD_INT * T0.W, PS, T0.X,
+; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
+;
+; CM-LABEL: i16_mad24:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 0, @12, KC0[], KC1[]
+; CM-NEXT: TEX 2 @6
+; CM-NEXT: ALU 8, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T1.X, T0.X, 40, #3
+; CM-NEXT: VTX_READ_16 T2.X, T0.X, 42, #3
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 13:
+; CM-NEXT: MULLO_INT T0.X (MASKED), T1.X, T2.X,
+; CM-NEXT: MULLO_INT T0.Y, T1.X, T2.X,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.X, T2.X,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T1.X, T2.X,
+; CM-NEXT: ADD_INT * T0.W, PV.Y, T0.X,
+; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i16_mad24:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_load_dword s4, s[4:5], 0xb
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_lshr_b32 s2, s2, 16
+; GCN-NEXT: s_mul_i32 s2, s4, s2
+; GCN-NEXT: s_add_i32 s2, s2, s3
+; GCN-NEXT: s_sext_i32_i16 s2, s2
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: v_mov_b32_e32 v0, s2
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: i16_mad24:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s6, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 s4, s0
+; GFX8-NEXT: s_lshr_b32 s0, s2, 16
+; GFX8-NEXT: s_mul_i32 s0, s8, s0
+; GFX8-NEXT: s_add_i32 s0, s0, s3
+; GFX8-NEXT: s_sext_i32_i16 s0, s0
+; GFX8-NEXT: s_mov_b32 s5, s1
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = mul i16 %a, %b
%1 = add i16 %0, %c
@@ -46,17 +172,85 @@ entry:
}
; FIXME: Need to handle non-uniform case for function below (load without gep).
-; FUNC-LABEL: {{^}}i8_mad24:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; GCN: s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}}
-; GCN: s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}}
-; GCN: s_sext_i32_i8 [[EXT:s[0-9]]], [[MAD]]
-; GCN: v_mov_b32_e32 v0, [[EXT]]
define amdgpu_kernel void @i8_mad24(ptr addrspace(1) %out, i8 %a, i8 %b, i8 %c) {
+; EG-LABEL: i8_mad24:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 0, @12, KC0[], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 4, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T1.X, T0.X, 40, #3
+; EG-NEXT: VTX_READ_8 T2.X, T0.X, 41, #3
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 42, #3
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: MULLO_INT * T0.Y, T1.X, T2.X,
+; EG-NEXT: ADD_INT * T0.W, PS, T0.X,
+; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
+;
+; CM-LABEL: i8_mad24:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 0, @12, KC0[], KC1[]
+; CM-NEXT: TEX 2 @6
+; CM-NEXT: ALU 8, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_8 T1.X, T0.X, 40, #3
+; CM-NEXT: VTX_READ_8 T2.X, T0.X, 41, #3
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 42, #3
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 13:
+; CM-NEXT: MULLO_INT T0.X (MASKED), T1.X, T2.X,
+; CM-NEXT: MULLO_INT T0.Y, T1.X, T2.X,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.X, T2.X,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T1.X, T2.X,
+; CM-NEXT: ADD_INT * T0.W, PV.Y, T0.X,
+; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i8_mad24:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s2, s[4:5], 0xb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_lshr_b32 s4, s2, 8
+; GCN-NEXT: s_lshr_b32 s5, s2, 16
+; GCN-NEXT: s_mul_i32 s2, s2, s4
+; GCN-NEXT: s_add_i32 s2, s2, s5
+; GCN-NEXT: s_sext_i32_i8 s4, s2
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: i8_mad24:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_lshr_b32 s4, s6, 8
+; GFX8-NEXT: s_lshr_b32 s5, s6, 16
+; GFX8-NEXT: s_mul_i32 s4, s6, s4
+; GFX8-NEXT: s_add_i32 s4, s4, s5
+; GFX8-NEXT: s_sext_i32_i8 s4, s4
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = mul i8 %a, %b
%1 = add i8 %0, %c
@@ -72,11 +266,75 @@ entry:
; 24-bit mad pattern wasn't being matched.
; Check that the select instruction is not deleted.
-; FUNC-LABEL: {{^}}i24_i32_i32_mad:
-; EG: CNDE_INT
-; SI: s_cselect
-; GCN2: s_cselect
define amdgpu_kernel void @i24_i32_i32_mad(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+; EG-LABEL: i24_i32_i32_mad:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: ASHR * T0.W, KC0[2].Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: CNDE_INT * T0.W, KC0[3].X, literal.x, PV.W,
+; EG-NEXT: 34(4.764415e-44), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.X, PV.W, KC0[3].X,
+; EG-NEXT: ADD_INT T0.X, PS, KC0[3].Y,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: i24_i32_i32_mad:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: ASHR * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT * T0.W, KC0[3].X, literal.x, PV.W,
+; CM-NEXT: 34(4.764415e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, KC0[3].X,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, KC0[3].X,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, KC0[3].X,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, KC0[3].X,
+; CM-NEXT: ADD_INT * T0.X, PV.X, KC0[3].Y,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i24_i32_i32_mad:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s2, s[4:5], 0xb
+; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_ashr_i32 s2, s2, 8
+; GCN-NEXT: s_cmp_lg_u32 s6, 0
+; GCN-NEXT: s_cselect_b32 s2, s2, 34
+; GCN-NEXT: s_mul_i32 s2, s2, s6
+; GCN-NEXT: s_add_i32 s4, s2, s7
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: i24_i32_i32_mad:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_ashr_i32 s4, s8, 8
+; GFX8-NEXT: s_cmp_lg_u32 s6, 0
+; GFX8-NEXT: s_cselect_b32 s4, s4, 34
+; GFX8-NEXT: s_mul_i32 s4, s4, s6
+; GFX8-NEXT: s_add_i32 s4, s4, s7
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = ashr i32 %a, 8
%1 = icmp ne i32 %c, 0
@@ -87,13 +345,139 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}extra_and:
-; SI-NOT: v_and
-; SI: s_mul_i32
-; SI: s_mul_i32
-; SI: s_add_i32
-; SI: s_add_i32
define amdgpu_kernel void @extra_and(ptr addrspace(1) %arg, i32 %arg2, i32 %arg3) {
+; EG-LABEL: extra_and:
+; EG: ; %bb.0: ; %bb
+; EG-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: LOOP_START_DX10 @7
+; EG-NEXT: ALU_PUSH_BEFORE 12, @16, KC0[], KC1[]
+; EG-NEXT: JUMP @6 POP:1
+; EG-NEXT: LOOP_BREAK @6
+; EG-NEXT: POP @6 POP:1
+; EG-NEXT: END_LOOP @2
+; EG-NEXT: ALU 1, @29, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T1.W, literal.x,
+; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; EG-NEXT: MOV * T3.W, PV.W,
+; EG-NEXT: MOV T0.Z, KC0[2].Y,
+; EG-NEXT: MOV T0.W, KC0[2].Z,
+; EG-NEXT: MOV * T2.W, KC0[2].W,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: AND_INT T1.W, T1.W, literal.x,
+; EG-NEXT: AND_INT * T4.W, T3.W, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: AND_INT T3.W, T3.W, literal.x,
+; EG-NEXT: MULLO_INT * T0.X, PS, PV.W,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.W,
+; EG-NEXT: ADD_INT T3.W, T2.W, PS,
+; EG-NEXT: ADD_INT * T1.W, T0.W, T0.X,
+; EG-NEXT: ADD_INT * T0.X, PS, PV.W,
+; EG-NEXT: SETNE_INT * T4.W, PV.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; EG-NEXT: ALU clause starting at 29:
+; EG-NEXT: LSHR * T1.X, T0.Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: extra_and:
+; CM: ; %bb.0: ; %bb
+; CM-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: LOOP_START_DX10 @7
+; CM-NEXT: ALU_PUSH_BEFORE 17, @16, KC0[], KC1[]
+; CM-NEXT: JUMP @6 POP:1
+; CM-NEXT: LOOP_BREAK @6
+; CM-NEXT: POP @6 POP:1
+; CM-NEXT: END_LOOP @2
+; CM-NEXT: ALU 1, @34, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.W, literal.x,
+; CM-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; CM-NEXT: MOV * T1.Z, PV.W,
+; CM-NEXT: MOV T0.Y, KC0[2].Y,
+; CM-NEXT: MOV T0.Z, KC0[2].Z,
+; CM-NEXT: MOV * T1.W, KC0[2].W,
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: AND_INT T1.Y, T1.Z, literal.x,
+; CM-NEXT: AND_INT T2.Z, T0.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T1.Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT * T0.W, T1.Y, T2.Z,
+; CM-NEXT: ADD_INT T1.Z, T1.W, PV.W,
+; CM-NEXT: ADD_INT * T0.W, T0.Z, T0.X,
+; CM-NEXT: ADD_INT * T0.X, PV.W, PV.Z,
+; CM-NEXT: SETNE_INT * T2.W, PV.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; CM-NEXT: ALU clause starting at 34:
+; CM-NEXT: LSHR * T1.X, T0.Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: extra_and:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; GCN-NEXT: s_mov_b32 s2, 0
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: .LBB4_1: ; %bb4
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: s_and_b32 s3, s6, 0xffffff
+; GCN-NEXT: s_and_b32 s6, s6, 0xffffff
+; GCN-NEXT: s_and_b32 s2, s2, 0xffffff
+; GCN-NEXT: s_mul_i32 s3, s3, s2
+; GCN-NEXT: s_mul_i32 s6, s6, s2
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s2, s0, s3
+; GCN-NEXT: s_add_i32 s6, s1, s6
+; GCN-NEXT: s_add_i32 s3, s2, s6
+; GCN-NEXT: s_cmp_lg_u32 s3, 8
+; GCN-NEXT: s_cbranch_scc1 .LBB4_1
+; GCN-NEXT: ; %bb.2: ; %bb18
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: extra_and:
+; GFX8: ; %bb.0: ; %bb
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX8-NEXT: s_mov_b32 s2, 0
+; GFX8-NEXT: s_mov_b32 s6, 0
+; GFX8-NEXT: .LBB4_1: ; %bb4
+; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT: s_and_b32 s3, s6, 0xffffff
+; GFX8-NEXT: s_and_b32 s6, s6, 0xffffff
+; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff
+; GFX8-NEXT: s_mul_i32 s3, s3, s2
+; GFX8-NEXT: s_mul_i32 s6, s6, s2
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s2, s0, s3
+; GFX8-NEXT: s_add_i32 s6, s1, s6
+; GFX8-NEXT: s_add_i32 s3, s2, s6
+; GFX8-NEXT: s_cmp_lg_u32 s3, 8
+; GFX8-NEXT: s_cbranch_scc1 .LBB4_1
+; GFX8-NEXT: ; %bb.2: ; %bb18
+; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s6, -1
+; GFX8-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT: s_endpgm
bb:
br label %bb4
@@ -119,13 +503,139 @@ bb18: ; preds = %bb4
ret void
}
-; FUNC-LABEL: {{^}}dont_remove_shift
-; SI: s_lshr
-; SI: s_mul_i32
-; SI: s_mul_i32
-; SI: s_add_i32
-; SI: s_add_i32
define amdgpu_kernel void @dont_remove_shift(ptr addrspace(1) %arg, i32 %arg2, i32 %arg3) {
+; EG-LABEL: dont_remove_shift:
+; EG: ; %bb.0: ; %bb
+; EG-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: LOOP_START_DX10 @7
+; EG-NEXT: ALU_PUSH_BEFORE 12, @16, KC0[], KC1[]
+; EG-NEXT: JUMP @6 POP:1
+; EG-NEXT: LOOP_BREAK @6
+; EG-NEXT: POP @6 POP:1
+; EG-NEXT: END_LOOP @2
+; EG-NEXT: ALU 1, @29, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T1.W, literal.x,
+; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; EG-NEXT: MOV * T3.W, PV.W,
+; EG-NEXT: MOV T0.Z, KC0[2].Y,
+; EG-NEXT: MOV T0.W, KC0[2].Z,
+; EG-NEXT: MOV * T2.W, KC0[2].W,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: LSHR T1.W, T1.W, literal.x,
+; EG-NEXT: LSHR * T4.W, T3.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T3.W, T3.W, literal.x,
+; EG-NEXT: MULLO_INT * T0.X, PS, PV.W,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.W,
+; EG-NEXT: ADD_INT T3.W, T2.W, PS,
+; EG-NEXT: ADD_INT * T1.W, T0.W, T0.X,
+; EG-NEXT: ADD_INT * T0.X, PS, PV.W,
+; EG-NEXT: SETNE_INT * T4.W, PV.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; EG-NEXT: ALU clause starting at 29:
+; EG-NEXT: LSHR * T1.X, T0.Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: dont_remove_shift:
+; CM: ; %bb.0: ; %bb
+; CM-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: LOOP_START_DX10 @7
+; CM-NEXT: ALU_PUSH_BEFORE 17, @16, KC0[], KC1[]
+; CM-NEXT: JUMP @6 POP:1
+; CM-NEXT: LOOP_BREAK @6
+; CM-NEXT: POP @6 POP:1
+; CM-NEXT: END_LOOP @2
+; CM-NEXT: ALU 1, @34, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.W, literal.x,
+; CM-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; CM-NEXT: MOV * T1.Z, PV.W,
+; CM-NEXT: MOV T0.Y, KC0[2].Y,
+; CM-NEXT: MOV T0.Z, KC0[2].Z,
+; CM-NEXT: MOV * T1.W, KC0[2].W,
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: LSHR T1.Y, T1.Z, literal.x,
+; CM-NEXT: LSHR T2.Z, T0.W, literal.x,
+; CM-NEXT: LSHR * T0.W, T1.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT * T0.W, T1.Y, T2.Z,
+; CM-NEXT: ADD_INT T1.Z, T1.W, PV.W,
+; CM-NEXT: ADD_INT * T0.W, T0.Z, T0.X,
+; CM-NEXT: ADD_INT * T0.X, PV.W, PV.Z,
+; CM-NEXT: SETNE_INT * T2.W, PV.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; CM-NEXT: ALU clause starting at 34:
+; CM-NEXT: LSHR * T1.X, T0.Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: dont_remove_shift:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; GCN-NEXT: s_mov_b32 s2, 0
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: .LBB5_1: ; %bb4
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: s_lshr_b32 s3, s6, 8
+; GCN-NEXT: s_lshr_b32 s6, s6, 8
+; GCN-NEXT: s_lshr_b32 s2, s2, 8
+; GCN-NEXT: s_mul_i32 s3, s3, s2
+; GCN-NEXT: s_mul_i32 s6, s6, s2
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s2, s0, s3
+; GCN-NEXT: s_add_i32 s6, s1, s6
+; GCN-NEXT: s_add_i32 s3, s2, s6
+; GCN-NEXT: s_cmp_lg_u32 s3, 8
+; GCN-NEXT: s_cbranch_scc1 .LBB5_1
+; GCN-NEXT: ; %bb.2: ; %bb18
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: dont_remove_shift:
+; GFX8: ; %bb.0: ; %bb
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX8-NEXT: s_mov_b32 s2, 0
+; GFX8-NEXT: s_mov_b32 s6, 0
+; GFX8-NEXT: .LBB5_1: ; %bb4
+; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT: s_lshr_b32 s3, s6, 8
+; GFX8-NEXT: s_lshr_b32 s6, s6, 8
+; GFX8-NEXT: s_lshr_b32 s2, s2, 8
+; GFX8-NEXT: s_mul_i32 s3, s3, s2
+; GFX8-NEXT: s_mul_i32 s6, s6, s2
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s2, s0, s3
+; GFX8-NEXT: s_add_i32 s6, s1, s6
+; GFX8-NEXT: s_add_i32 s3, s2, s6
+; GFX8-NEXT: s_cmp_lg_u32 s3, 8
+; GFX8-NEXT: s_cbranch_scc1 .LBB5_1
+; GFX8-NEXT: ; %bb.2: ; %bb18
+; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s6, -1
+; GFX8-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT: s_endpgm
bb:
br label %bb4
@@ -151,19 +661,234 @@ bb18: ; preds = %bb4
ret void
}
-; FUNC-LABEL: {{^}}i8_mad_sat_16:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
-; SI: v_med3_i32 v{{[0-9]}}, [[EXT]],
-; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; VI: v_max_i16_e32 [[MAX:v[0-9]]], 0xff80, [[MAD]]
-; VI: v_min_i16_e32 {{v[0-9]}}, 0x7f, [[MAX]]
define amdgpu_kernel void @i8_mad_sat_16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(5) %idx) {
+; EG-LABEL: i8_mad_sat_16:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 24, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_8 T3.X, T3.X, 0, #1
+; EG-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; EG-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.X,
+; EG-NEXT: ALU clause starting at 19:
+; EG-NEXT: ADD_INT T2.X, KC0[2].Z, T0.X,
+; EG-NEXT: ADD_INT * T3.X, KC0[3].X, T0.X,
+; EG-NEXT: ALU clause starting at 21:
+; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T1.W, T3.X, 0.0, literal.x,
+; EG-NEXT: MULLO_INT * T0.Y, PV.Z, PV.W,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: MAX_INT T0.W, PV.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, T0.X,
+; EG-NEXT: -128(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PS, literal.x,
+; EG-NEXT: MIN_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 3(4.203895e-45), 127(1.779649e-43)
+; EG-NEXT: AND_INT T0.W, PS, literal.x,
+; EG-NEXT: LSHL * T2.W, PV.W, literal.y,
+; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT: LSHL T0.X, PV.W, PS,
+; EG-NEXT: LSHL * T0.W, literal.x, PS,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, T1.W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: i8_mad_sat_16:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 26, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T1.XW, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_8 T3.X, T3.X, 0, #1
+; CM-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; CM-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; CM-NEXT: ADD_INT * T1.X, KC0[3].X, PV.X,
+; CM-NEXT: ALU clause starting at 19:
+; CM-NEXT: ADD_INT * T2.X, KC0[2].W, T0.X,
+; CM-NEXT: ADD_INT * T3.X, KC0[2].Z, T0.X,
+; CM-NEXT: ALU clause starting at 21:
+; CM-NEXT: BFE_INT T0.Y, T1.X, 0.0, literal.x,
+; CM-NEXT: BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: BFE_INT * T0.W, T3.X, 0.0, literal.x, BS:VEC_201
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.Z, T0.W,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.Z, T0.W,
+; CM-NEXT: MULLO_INT T0.Z, T0.Z, T0.W,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.Z, T0.W,
+; CM-NEXT: ADD_INT * T0.W, PV.Z, T0.Y,
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: MAX_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.X,
+; CM-NEXT: -128(nan), 0(0.000000e+00)
+; CM-NEXT: AND_INT T1.Z, PV.W, literal.x,
+; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.y,
+; CM-NEXT: 3(4.203895e-45), 127(1.779649e-43)
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: LSHL * T1.W, PV.Z, literal.y,
+; CM-NEXT: 255(3.573311e-43), 3(4.203895e-45)
+; CM-NEXT: LSHL T1.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T1.W, literal.x, PV.W,
+; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; CM-NEXT: MOV T1.Y, 0.0,
+; CM-NEXT: MOV * T1.Z, 0.0,
+; CM-NEXT: LSHR * T0.X, T0.W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i8_mad_sat_16:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s22, -1
+; GCN-NEXT: s_mov_b32 s23, 0xe8f000
+; GCN-NEXT: s_add_u32 s20, s20, s11
+; GCN-NEXT: s_addc_u32 s21, s21, 0
+; GCN-NEXT: s_load_dword s8, s[4:5], 0x11
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s9, s8, 4
+; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: v_mov_b32_e32 v1, s9
+; GCN-NEXT: buffer_load_dword v1, v1, s[20:23], 0 offen
+; GCN-NEXT: buffer_load_dword v0, v0, s[20:23], 0 offen
+; GCN-NEXT: s_mov_b32 s11, 0xf000
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GCN-NEXT: s_mov_b64 s[18:19], s[10:11]
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GCN-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GCN-NEXT: s_mov_b64 s[16:17], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v3, v[0:1], s[8:11], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v4, v[0:1], s[16:19], 0 addr64
+; GCN-NEXT: s_movk_i32 s2, 0xff80
+; GCN-NEXT: s_waitcnt vmcnt(2)
+; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GCN-NEXT: s_waitcnt vmcnt(1)
+; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mad_u32_u24 v2, v2, v3, v4
+; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16
+; GCN-NEXT: v_mov_b32_e32 v3, 0x7f
+; GCN-NEXT: v_med3_i32 v2, v2, s2, v3
+; GCN-NEXT: s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
+;
+; SI-LABEL: i8_mad_sat_16:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_load_dword s0, s[4:5], 0x44
+; SI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s90, -1
+; SI-NEXT: s_mov_b32 s91, 0xe80000
+; SI-NEXT: s_add_u32 s88, s88, s11
+; SI-NEXT: s_addc_u32 s89, s89, 0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_add_i32 s1, s0, 4
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: buffer_load_dword v6, v0, s[88:91], 0 offen
+; SI-NEXT: v_mov_b32_e32 v0, s1
+; SI-NEXT: buffer_load_dword v7, v0, s[88:91], 0 offen
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: v_mov_b32_e32 v3, s5
+; SI-NEXT: v_mov_b32_e32 v5, s7
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_add_u32_e32 v0, vcc, s2, v6
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; SI-NEXT: v_add_u32_e32 v2, vcc, s4, v6
+; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; SI-NEXT: v_add_u32_e32 v4, vcc, s6, v6
+; SI-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; SI-NEXT: flat_load_sbyte v0, v[0:1]
+; SI-NEXT: flat_load_sbyte v1, v[2:3]
+; SI-NEXT: flat_load_sbyte v2, v[4:5]
+; SI-NEXT: v_mov_b32_e32 v3, s1
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_mad_u16 v0, v1, v0, v2
+; SI-NEXT: v_max_i16_e32 v0, 0xff80, v0
+; SI-NEXT: v_min_i16_e32 v2, 0x7f, v0
+; SI-NEXT: v_add_u32_e32 v0, vcc, s0, v6
+; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v7, vcc
+; SI-NEXT: flat_store_byte v[0:1], v2
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: i8_mad_sat_16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_load_dword s0, s[4:5], 0x44
+; VI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s14, -1
+; VI-NEXT: s_mov_b32 s15, 0xe80000
+; VI-NEXT: s_add_u32 s12, s12, s11
+; VI-NEXT: s_addc_u32 s13, s13, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_i32 s1, s0, 4
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: buffer_load_dword v6, v0, s[12:15], 0 offen
+; VI-NEXT: v_mov_b32_e32 v0, s1
+; VI-NEXT: buffer_load_dword v7, v0, s[12:15], 0 offen
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: v_mov_b32_e32 v3, s5
+; VI-NEXT: v_mov_b32_e32 v5, s7
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v6
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v6
+; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; VI-NEXT: v_add_u32_e32 v4, vcc, s6, v6
+; VI-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; VI-NEXT: flat_load_sbyte v0, v[0:1]
+; VI-NEXT: flat_load_sbyte v1, v[2:3]
+; VI-NEXT: flat_load_sbyte v2, v[4:5]
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_mad_u16 v0, v1, v0, v2
+; VI-NEXT: v_max_i16_e32 v0, 0xff80, v0
+; VI-NEXT: v_min_i16_e32 v2, 0x7f, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v6
+; VI-NEXT: v_addc_u32_e32 v1, vcc, v3, v7, vcc
+; VI-NEXT: flat_store_byte v[0:1], v2
+; VI-NEXT: s_endpgm
entry:
%retval.0.i = load i64, ptr addrspace(5) %idx
%arrayidx = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 %retval.0.i
@@ -187,16 +912,201 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}i8_mad_32:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
define amdgpu_kernel void @i8_mad_32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(5) %idx) {
+; EG-LABEL: i8_mad_32:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 9, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; EG-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.X,
+; EG-NEXT: ALU clause starting at 19:
+; EG-NEXT: ADD_INT T2.X, KC0[2].Z, T0.X,
+; EG-NEXT: ADD_INT * T0.X, KC0[3].X, T0.X,
+; EG-NEXT: ALU clause starting at 21:
+; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T1.W, T0.X, 0.0, literal.x,
+; EG-NEXT: MULLO_INT * T0.X, PV.W, PV.Z,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
+;
+; CM-LABEL: i8_mad_32:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 12, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; CM-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; CM-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; CM-NEXT: ADD_INT * T1.X, KC0[3].X, PV.X,
+; CM-NEXT: ALU clause starting at 19:
+; CM-NEXT: ADD_INT * T2.X, KC0[2].W, T0.X,
+; CM-NEXT: ADD_INT * T0.X, KC0[2].Z, T0.X,
+; CM-NEXT: ALU clause starting at 21:
+; CM-NEXT: BFE_INT T0.Y, T1.X, 0.0, literal.x,
+; CM-NEXT: BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_201
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.Y,
+; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i8_mad_32:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s26, -1
+; GCN-NEXT: s_mov_b32 s27, 0xe8f000
+; GCN-NEXT: s_add_u32 s24, s24, s11
+; GCN-NEXT: s_addc_u32 s25, s25, 0
+; GCN-NEXT: s_load_dword s8, s[4:5], 0x11
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s9, s8, 4
+; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: v_mov_b32_e32 v1, s9
+; GCN-NEXT: buffer_load_dword v1, v1, s[24:27], 0 offen
+; GCN-NEXT: buffer_load_dword v0, v0, s[24:27], 0 offen
+; GCN-NEXT: s_mov_b32 s11, 0xf000
+; GCN-NEXT: s_mov_b32 s14, 0
+; GCN-NEXT: s_mov_b32 s15, s11
+; GCN-NEXT: s_mov_b64 s[18:19], s[14:15]
+; GCN-NEXT: s_mov_b64 s[22:23], s[14:15]
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GCN-NEXT: s_mov_b64 s[16:17], s[4:5]
+; GCN-NEXT: s_mov_b64 s[20:21], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v3, v[0:1], s[16:19], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v0, v[0:1], s[20:23], 0 addr64
+; GCN-NEXT: s_mov_b32 s10, -1
+; GCN-NEXT: s_mov_b32 s8, s0
+; GCN-NEXT: s_mov_b32 s9, s1
+; GCN-NEXT: s_waitcnt vmcnt(2)
+; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v2
+; GCN-NEXT: s_waitcnt vmcnt(1)
+; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mad_u32_u24 v0, v1, v2, v0
+; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NEXT: s_endpgm
+;
+; SI-LABEL: i8_mad_32:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_load_dword s0, s[4:5], 0x44
+; SI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s90, -1
+; SI-NEXT: s_mov_b32 s91, 0xe80000
+; SI-NEXT: s_add_u32 s88, s88, s11
+; SI-NEXT: s_addc_u32 s89, s89, 0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_add_i32 s1, s0, 4
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: buffer_load_dword v4, v0, s[88:91], 0 offen
+; SI-NEXT: v_mov_b32_e32 v0, s1
+; SI-NEXT: buffer_load_dword v5, v0, s[88:91], 0 offen
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: v_mov_b32_e32 v3, s5
+; SI-NEXT: v_mov_b32_e32 v6, s7
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_add_u32_e32 v0, vcc, s2, v4
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; SI-NEXT: v_add_u32_e32 v2, vcc, s4, v4
+; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; SI-NEXT: v_add_u32_e32 v4, vcc, s6, v4
+; SI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; SI-NEXT: flat_load_sbyte v0, v[0:1]
+; SI-NEXT: flat_load_sbyte v1, v[2:3]
+; SI-NEXT: flat_load_sbyte v2, v[4:5]
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_mad_u16 v0, v0, v1, v2
+; SI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: i8_mad_32:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_load_dword s0, s[4:5], 0x44
+; VI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s14, -1
+; VI-NEXT: s_mov_b32 s15, 0xe80000
+; VI-NEXT: s_add_u32 s12, s12, s11
+; VI-NEXT: s_addc_u32 s13, s13, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_i32 s1, s0, 4
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: buffer_load_dword v4, v0, s[12:15], 0 offen
+; VI-NEXT: v_mov_b32_e32 v0, s1
+; VI-NEXT: buffer_load_dword v5, v0, s[12:15], 0 offen
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: v_mov_b32_e32 v3, s5
+; VI-NEXT: v_mov_b32_e32 v6, s7
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v4
+; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; VI-NEXT: v_add_u32_e32 v4, vcc, s6, v4
+; VI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; VI-NEXT: flat_load_sbyte v0, v[0:1]
+; VI-NEXT: flat_load_sbyte v1, v[2:3]
+; VI-NEXT: flat_load_sbyte v2, v[4:5]
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_mad_u16 v0, v0, v1, v2
+; VI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
entry:
%retval.0.i = load i64, ptr addrspace(5) %idx
%arrayidx = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %retval.0.i
@@ -215,16 +1125,207 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}i8_mad_64:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
define amdgpu_kernel void @i8_mad_64(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(5) %idx) {
+; EG-LABEL: i8_mad_64:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 11, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; EG-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.X,
+; EG-NEXT: ALU clause starting at 19:
+; EG-NEXT: ADD_INT T2.X, KC0[2].Z, T0.X,
+; EG-NEXT: ADD_INT * T0.X, KC0[3].X, T0.X,
+; EG-NEXT: ALU clause starting at 21:
+; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T1.W, T0.X, 0.0, literal.x,
+; EG-NEXT: MULLO_INT * T0.X, PV.W, PV.Z,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
+; EG-NEXT: ASHR * T0.Y, PV.X, literal.x,
+; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
+;
+; CM-LABEL: i8_mad_64:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 13, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; CM-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; CM-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; CM-NEXT: ADD_INT * T1.X, KC0[3].X, PV.X,
+; CM-NEXT: ALU clause starting at 19:
+; CM-NEXT: ADD_INT * T2.X, KC0[2].W, T0.X,
+; CM-NEXT: ADD_INT * T0.X, KC0[2].Z, T0.X,
+; CM-NEXT: ALU clause starting at 21:
+; CM-NEXT: BFE_INT T0.Y, T1.X, 0.0, literal.x,
+; CM-NEXT: BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_201
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.Y,
+; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: ASHR * T0.Y, PV.X, literal.y,
+; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
+;
+; GCN-LABEL: i8_mad_64:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s26, -1
+; GCN-NEXT: s_mov_b32 s27, 0xe8f000
+; GCN-NEXT: s_add_u32 s24, s24, s11
+; GCN-NEXT: s_addc_u32 s25, s25, 0
+; GCN-NEXT: s_load_dword s8, s[4:5], 0x11
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s9, s8, 4
+; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: v_mov_b32_e32 v1, s9
+; GCN-NEXT: buffer_load_dword v1, v1, s[24:27], 0 offen
+; GCN-NEXT: buffer_load_dword v0, v0, s[24:27], 0 offen
+; GCN-NEXT: s_mov_b32 s11, 0xf000
+; GCN-NEXT: s_mov_b32 s14, 0
+; GCN-NEXT: s_mov_b32 s15, s11
+; GCN-NEXT: s_mov_b64 s[18:19], s[14:15]
+; GCN-NEXT: s_mov_b64 s[22:23], s[14:15]
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GCN-NEXT: s_mov_b64 s[16:17], s[4:5]
+; GCN-NEXT: s_mov_b64 s[20:21], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v3, v[0:1], s[16:19], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v0, v[0:1], s[20:23], 0 addr64
+; GCN-NEXT: s_mov_b32 s10, -1
+; GCN-NEXT: s_mov_b32 s8, s0
+; GCN-NEXT: s_mov_b32 s9, s1
+; GCN-NEXT: s_waitcnt vmcnt(2)
+; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v2
+; GCN-NEXT: s_waitcnt vmcnt(1)
+; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mad_u32_u24 v0, v1, v2, v0
+; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; GCN-NEXT: s_endpgm
+;
+; SI-LABEL: i8_mad_64:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_load_dword s0, s[4:5], 0x44
+; SI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s90, -1
+; SI-NEXT: s_mov_b32 s91, 0xe80000
+; SI-NEXT: s_add_u32 s88, s88, s11
+; SI-NEXT: s_addc_u32 s89, s89, 0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_add_i32 s1, s0, 4
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: buffer_load_dword v4, v0, s[88:91], 0 offen
+; SI-NEXT: v_mov_b32_e32 v0, s1
+; SI-NEXT: buffer_load_dword v5, v0, s[88:91], 0 offen
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: v_mov_b32_e32 v3, s5
+; SI-NEXT: v_mov_b32_e32 v6, s7
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_add_u32_e32 v0, vcc, s2, v4
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; SI-NEXT: v_add_u32_e32 v2, vcc, s4, v4
+; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; SI-NEXT: v_add_u32_e32 v4, vcc, s6, v4
+; SI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; SI-NEXT: flat_load_sbyte v0, v[0:1]
+; SI-NEXT: flat_load_sbyte v1, v[2:3]
+; SI-NEXT: flat_load_sbyte v2, v[4:5]
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_mad_u16 v0, v0, v1, v2
+; SI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: i8_mad_64:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_load_dword s0, s[4:5], 0x44
+; VI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s14, -1
+; VI-NEXT: s_mov_b32 s15, 0xe80000
+; VI-NEXT: s_add_u32 s12, s12, s11
+; VI-NEXT: s_addc_u32 s13, s13, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_i32 s1, s0, 4
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: buffer_load_dword v4, v0, s[12:15], 0 offen
+; VI-NEXT: v_mov_b32_e32 v0, s1
+; VI-NEXT: buffer_load_dword v5, v0, s[12:15], 0 offen
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: v_mov_b32_e32 v3, s5
+; VI-NEXT: v_mov_b32_e32 v6, s7
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v4
+; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; VI-NEXT: v_add_u32_e32 v4, vcc, s6, v4
+; VI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; VI-NEXT: flat_load_sbyte v0, v[0:1]
+; VI-NEXT: flat_load_sbyte v1, v[2:3]
+; VI-NEXT: flat_load_sbyte v2, v[4:5]
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_mad_u16 v0, v0, v1, v2
+; VI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT: s_endpgm
entry:
%retval.0.i = load i64, ptr addrspace(5) %idx
%arrayidx = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %retval.0.i
@@ -248,17 +1349,236 @@ entry:
; had a chance to form mul24. The mul combine would then see
; extractelement with no known bits and fail. All of the mul/add
; combos in this loop should form v_mad_u32_u24.
-
-; FUNC-LABEL: {{^}}mad24_known_bits_destroyed:
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
define void @mad24_known_bits_destroyed(i32 %arg, <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, i32 %arg4, i32 %arg5, i32 %arg6, ptr addrspace(1) %arg7, ptr addrspace(1) %arg8) #0 {
+; EG-LABEL: mad24_known_bits_destroyed:
+; EG: ; %bb.0: ; %bb
+; EG-NEXT: ALU 21, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: LOOP_START_DX10 @11
+; EG-NEXT: ALU 8, @34, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T2.X, 0
+; EG-NEXT: ALU 14, @43, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 0
+; EG-NEXT: ALU_PUSH_BEFORE 3, @58, KC0[], KC1[]
+; EG-NEXT: JUMP @10 POP:1
+; EG-NEXT: LOOP_BREAK @10
+; EG-NEXT: POP @10 POP:1
+; EG-NEXT: END_LOOP @2
+; EG-NEXT: CF_END
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T0.W, KC0[5].X,
+; EG-NEXT: MOV * T0.Z, KC0[4].W,
+; EG-NEXT: MOV * T0.Y, KC0[4].Z,
+; EG-NEXT: MOV T0.X, KC0[2].Y,
+; EG-NEXT: AND_INT * T1.Y, KC0[4].X, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.Z, KC0[3].W, literal.x,
+; EG-NEXT: AND_INT T1.W, KC0[3].Z, literal.x,
+; EG-NEXT: MOV * T2.W, KC0[7].Y,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: LSHR T1.X, PS, literal.x,
+; EG-NEXT: AND_INT T2.Y, KC0[6].Y, literal.y,
+; EG-NEXT: MOV T2.Z, KC0[6].X,
+; EG-NEXT: MOV * T2.W, KC0[5].W,
+; EG-NEXT: 2(2.802597e-45), 16777215(2.350989e-38)
+; EG-NEXT: MOV * T3.W, KC0[7].X,
+; EG-NEXT: LSHR T2.X, PV.W, literal.x,
+; EG-NEXT: MOV T3.Y, KC0[5].Z,
+; EG-NEXT: MOV T3.Z, KC0[6].Z,
+; EG-NEXT: MOV * T3.W, KC0[6].W,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV * T4.W, KC0[4].Y,
+; EG-NEXT: ALU clause starting at 34:
+; EG-NEXT: MULLO_INT * T0.X, T0.X, T2.Y,
+; EG-NEXT: ADD_INT * T4.W, PS, T3.Z,
+; EG-NEXT: AND_INT * T4.W, PV.W, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.X, PV.W, T2.Y,
+; EG-NEXT: MULLO_INT * T0.W, T0.W, T1.Y,
+; EG-NEXT: MULLO_INT * T0.Z, T0.Z, T1.Z,
+; EG-NEXT: MULLO_INT * T0.Y, T0.Y, T1.W,
+; EG-NEXT: ADD_INT * T0.X, T0.X, T3.Z,
+; EG-NEXT: ALU clause starting at 43:
+; EG-NEXT: ADD_INT * T4.W, T0.Y, T3.Y,
+; EG-NEXT: AND_INT T4.W, PV.W, literal.x,
+; EG-NEXT: ADD_INT * T5.W, T0.Z, T2.W,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.Z, PS, literal.x,
+; EG-NEXT: ADD_INT T0.W, T0.W, T2.Z,
+; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.W,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: ADD_INT T0.Y, PS, T3.Y,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: MULLO_INT * T0.Z, PV.Z, T1.Z,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: ADD_INT T0.Z, PS, T2.W,
+; EG-NEXT: MULLO_INT * T0.W, PV.W, T1.Y,
+; EG-NEXT: ADD_INT * T0.W, PS, T2.Z,
+; EG-NEXT: ALU clause starting at 58:
+; EG-NEXT: ADD_INT * T3.W, T3.W, literal.x,
+; EG-NEXT: -1(nan), 0(0.000000e+00)
+; EG-NEXT: SETE_INT * T4.W, PV.W, 0.0,
+; EG-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+;
+; CM-LABEL: mad24_known_bits_destroyed:
+; CM: ; %bb.0: ; %bb
+; CM-NEXT: ALU 22, @12, KC0[CB0:0-32], KC1[]
+; CM-NEXT: LOOP_START_DX10 @11
+; CM-NEXT: ALU 23, @35, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T2.X
+; CM-NEXT: ALU 23, @59, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: ALU_PUSH_BEFORE 3, @83, KC0[], KC1[]
+; CM-NEXT: JUMP @10 POP:1
+; CM-NEXT: LOOP_BREAK @10
+; CM-NEXT: POP @10 POP:1
+; CM-NEXT: END_LOOP @2
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: MOV * T0.W, KC0[5].X,
+; CM-NEXT: MOV * T0.Z, KC0[4].W,
+; CM-NEXT: MOV * T0.Y, KC0[4].Z,
+; CM-NEXT: MOV T0.X, KC0[2].Y,
+; CM-NEXT: AND_INT * T1.Y, KC0[4].X, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: AND_INT T1.Z, KC0[3].W, literal.x,
+; CM-NEXT: AND_INT * T1.W, KC0[3].Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: AND_INT T2.Y, KC0[6].Y, literal.x,
+; CM-NEXT: MOV T2.Z, KC0[6].X,
+; CM-NEXT: MOV * T2.W, KC0[7].Y,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: LSHR T1.X, PV.W, literal.x,
+; CM-NEXT: MOV T3.Y, KC0[5].W,
+; CM-NEXT: MOV T3.Z, KC0[5].Z,
+; CM-NEXT: MOV * T2.W, KC0[7].X,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: LSHR T2.X, PV.W, literal.x,
+; CM-NEXT: MOV T4.Y, KC0[6].Z,
+; CM-NEXT: MOV T4.Z, KC0[6].W,
+; CM-NEXT: MOV * T2.W, KC0[4].Y,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 35:
+; CM-NEXT: MULLO_INT T0.X, T0.X, T2.Y,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.X, T2.Y,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.X, T2.Y,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.X, T2.Y,
+; CM-NEXT: ADD_INT * T2.W, PV.X, T4.Y,
+; CM-NEXT: AND_INT * T2.W, PV.W, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T2.W, T2.Y,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T2.W, T2.Y,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T2.W, T2.Y,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T2.W, T2.Y,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT * T0.W, T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.Z, T1.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.Z, T1.Z,
+; CM-NEXT: MULLO_INT T0.Z, T0.Z, T1.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.Z, T1.Z,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.Y, T1.W,
+; CM-NEXT: MULLO_INT T0.Y, T0.Y, T1.W,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.Y, T1.W,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.Y, T1.W,
+; CM-NEXT: ADD_INT * T0.X, T0.X, T4.Y,
+; CM-NEXT: ALU clause starting at 59:
+; CM-NEXT: ADD_INT * T2.W, T0.Y, T3.Z,
+; CM-NEXT: ADD_INT T0.Z, T0.Z, T3.Y,
+; CM-NEXT: AND_INT * T2.W, PV.W, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT T0.Y, T2.W, T1.W,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T2.W, T1.W,
+; CM-NEXT: ADD_INT T0.Y, PV.Y, T3.Z,
+; CM-NEXT: ADD_INT T5.Z, T0.W, T2.Z, BS:VEC_021/SCL_122
+; CM-NEXT: AND_INT * T0.W, T0.Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, T1.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Z,
+; CM-NEXT: MULLO_INT T0.Z, T0.W, T1.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T1.Z,
+; CM-NEXT: ADD_INT T0.Z, PV.Z, T3.Y,
+; CM-NEXT: AND_INT * T0.W, T5.Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT * T0.W, T0.W, T1.Y,
+; CM-NEXT: ADD_INT * T0.W, PV.W, T2.Z,
+; CM-NEXT: ALU clause starting at 83:
+; CM-NEXT: ADD_INT * T4.Z, T4.Z, literal.x,
+; CM-NEXT: -1(nan), 0(0.000000e+00)
+; CM-NEXT: SETE_INT * T2.W, PV.Z, 0.0,
+; CM-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+;
+; GCN-LABEL: mad24_known_bits_destroyed:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v5, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0xffffff, v13
+; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v2
+; GCN-NEXT: v_and_b32_e32 v2, 0xffffff, v3
+; GCN-NEXT: v_and_b32_e32 v3, 0xffffff, v4
+; GCN-NEXT: s_mov_b64 s[8:9], 0
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s4, s6
+; GCN-NEXT: s_mov_b32 s5, s6
+; GCN-NEXT: .LBB9_1: ; %bb19
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: v_mad_u32_u24 v4, v5, v0, v14
+; GCN-NEXT: s_waitcnt expcnt(0)
+; GCN-NEXT: v_mad_u32_u24 v6, v6, v1, v10
+; GCN-NEXT: v_mad_u32_u24 v7, v7, v2, v11
+; GCN-NEXT: v_mad_u32_u24 v8, v8, v3, v12
+; GCN-NEXT: v_add_i32_e32 v15, vcc, -1, v15
+; GCN-NEXT: v_mad_u32_u24 v5, v4, v0, v14
+; GCN-NEXT: v_mad_u32_u24 v6, v6, v1, v10
+; GCN-NEXT: v_mad_u32_u24 v7, v7, v2, v11
+; GCN-NEXT: v_mad_u32_u24 v8, v8, v3, v12
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
+; GCN-NEXT: buffer_store_dword v5, v[16:17], s[4:7], 0 addr64
+; GCN-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
+; GCN-NEXT: buffer_store_dwordx4 v[5:8], v[18:19], s[4:7], 0 addr64
+; GCN-NEXT: s_andn2_b64 exec, exec, s[8:9]
+; GCN-NEXT: s_cbranch_execnz .LBB9_1
+; GCN-NEXT: ; %bb.2: ; %bb18
+; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: mad24_known_bits_destroyed:
+; GFX8: ; %bb.0: ; %bb
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v5, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v13
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v2
+; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3
+; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v4
+; GFX8-NEXT: s_mov_b64 s[4:5], 0
+; GFX8-NEXT: .LBB9_1: ; %bb19
+; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT: v_add_u32_e32 v15, vcc, -1, v15
+; GFX8-NEXT: v_mad_u32_u24 v4, v5, v0, v14
+; GFX8-NEXT: v_mad_u32_u24 v6, v6, v1, v10
+; GFX8-NEXT: v_mad_u32_u24 v7, v7, v2, v11
+; GFX8-NEXT: v_mad_u32_u24 v8, v8, v3, v12
+; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
+; GFX8-NEXT: v_mad_u32_u24 v5, v4, v0, v14
+; GFX8-NEXT: v_mad_u32_u24 v6, v6, v1, v10
+; GFX8-NEXT: v_mad_u32_u24 v7, v7, v2, v11
+; GFX8-NEXT: v_mad_u32_u24 v8, v8, v3, v12
+; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX8-NEXT: flat_store_dword v[16:17], v5
+; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[5:8]
+; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; GFX8-NEXT: s_cbranch_execnz .LBB9_1
+; GFX8-NEXT: ; %bb.2: ; %bb18
+; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
bb:
%tmp = and i32 %arg4, 16777215
%tmp9 = extractelement <4 x i32> %arg1, i64 1
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index bb22144..9814ed8 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -1,15 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9,GFX9-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-FAKE16 %s
-
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX9,GFX9-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
; CI-LABEL: add_select_fabs_fabs_v2f16:
@@ -63,69 +57,37 @@ define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -198,73 +160,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fabs_fabs_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v1, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -328,73 +256,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fabs_fabs_v2f1
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -469,73 +363,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fabs_fabs_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -597,63 +457,34 @@ define <2 x half> @add_select_fabs_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %y
@@ -709,61 +540,33 @@ define <2 x half> @add_select_fabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half -1.0, half -1.0>
@@ -815,61 +618,33 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %select)
@@ -920,61 +695,33 @@ define <2 x half> @add_select_posk_posk_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half 2.0, half 2.0>, <2 x half> <half 1.0, half 1.0>
%add = fadd <2 x half> %select, %x
@@ -1029,61 +776,33 @@ define <2 x half> @add_select_negk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fabs
@@ -1140,61 +859,33 @@ define <2 x half> @add_select_negliteralk_fabs_v2f16(<2 x i32> %c, <2 x half> %x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> <half -1024.0, half -1024.0>, <2 x half> %fabs
@@ -1250,61 +941,33 @@ define <2 x half> @add_select_fabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half 1.0, half 1.0>
@@ -1360,61 +1023,33 @@ define <2 x half> @add_select_posk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fabs
@@ -1470,57 +1105,31 @@ define <2 x half> @add_select_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1587,61 +1196,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fneg_fneg_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1705,61 +1286,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fneg_fneg_v2f1
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1828,61 +1381,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fneg_fneg_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1948,63 +1473,34 @@ define <2 x half> @add_select_fneg_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %y
@@ -2058,55 +1554,30 @@ define <2 x half> @add_select_fneg_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half -1.0, half -1.0>
@@ -2161,55 +1632,30 @@ define <2 x half> @add_select_fneg_inv2pi_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xH3118, half 0xH3118>
@@ -2264,55 +1710,30 @@ define <2 x half> @add_select_fneg_neginv2pi_v2f16(<2 x i32> %c, <2 x half> %x,
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xHB118, half 0xHB118>
@@ -2363,61 +1784,33 @@ define <2 x half> @add_select_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
%add = fadd <2 x half> %select, %x
@@ -2469,61 +1862,33 @@ define <2 x half> @add_select_negliteralk_negliteralk_v2f16(<2 x i32> %c, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2048.0, half -2048.0>, <2 x half> <half -4096.0, half -4096.0>
%add = fadd <2 x half> %select, %x
@@ -2573,61 +1938,33 @@ define <2 x half> @add_select_fneg_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
%fneg.x = fneg <2 x half> %select
@@ -2681,55 +2018,30 @@ define <2 x half> @add_select_negk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fneg.x
@@ -2783,55 +2095,30 @@ define <2 x half> @add_select_fneg_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 1.0, half 1.0>
@@ -2885,55 +2172,30 @@ define <2 x half> @add_select_posk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fneg.x
@@ -2997,69 +2259,37 @@ define <2 x half> @add_select_negfabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3125,69 +2355,37 @@ define <2 x half> @add_select_fabs_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3253,69 +2451,37 @@ define <2 x half> @add_select_neg_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3380,69 +2546,37 @@ define <2 x half> @add_select_fabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.y = fneg <2 x half> %y
@@ -3501,63 +2635,34 @@ define <2 x half> @add_select_neg_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3617,63 +2722,34 @@ define <2 x half> @add_select_negfabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3735,61 +2811,33 @@ define <2 x half> @mul_select_negfabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3850,61 +2898,33 @@ define <2 x half> @mul_select_posk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3965,61 +2985,33 @@ define <2 x half> @mul_select_negfabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -4080,61 +3072,33 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -4171,115 +3135,63 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x4400
-; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SAFE-NEXT: v_add_f16_e32 v2, 4.0, v2
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0xc400
-; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NSZ-NEXT: v_sub_f16_e32 v2, -4.0, v2
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_add_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0x4400
+; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_add_f16_e32 v2, 4.0, v2
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%add = fadd <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %add
@@ -4330,55 +3242,30 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%add = fadd nsz <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %add
@@ -4387,153 +3274,86 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
}
define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
-; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_add_f32_e32 v3, -4.0, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v2, -4.0, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
-; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
-; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
-; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0xc400
-; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SAFE-NEXT: v_add_f16_e32 v2, -4.0, v2
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-NSZ: ; %bb.0:
-; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NSZ-NEXT: v_sub_f32_e32 v2, 4.0, v2
-; CI-NSZ-NEXT: v_sub_f32_e32 v3, 4.0, v3
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
-; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x4400
-; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NSZ-NEXT: v_sub_f16_e32 v2, 4.0, v2
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; CI-LABEL: select_fneg_posk_src_sub_v2f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_add_f32_e32 v3, -4.0, v3
+; CI-NEXT: v_add_f32_e32 v2, -4.0, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT: v_or_b32_e32 v2, v2, v3
+; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
+; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_sub_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0xc400
+; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_add_f16_e32 v2, -4.0, v2
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%add = fsub <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %add
@@ -4541,6 +3361,80 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
ret <2 x half> %select
}
+define <2 x half> @select_fneg_posk_src_sub_v2f16_nsz(<2 x i32> %c, <2 x half> %x) {
+; CI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_sub_f32_e32 v2, 4.0, v2
+; CI-NEXT: v_sub_f32_e32 v3, 4.0, v3
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0x4400
+; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_sub_f16_e32 v2, 4.0, v2
+; VI-NEXT: v_mov_b32_e32 v3, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq <2 x i32> %c, zeroinitializer
+ %add = fsub <2 x half> %x, <half 4.0, half 4.0>
+ %fneg = fneg nsz <2 x half> %add
+ %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
+ ret <2 x half> %select
+}
+
define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
; CI-LABEL: select_fneg_posk_src_mul_v2f16:
; CI: ; %bb.0:
@@ -4584,55 +3478,30 @@ define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%mul = fmul <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %mul
@@ -4668,118 +3537,65 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1
-; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1
-; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_fma_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1
+; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
%fneg = fneg <2 x half> %fma
@@ -4817,118 +3633,65 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1
-; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1
-; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_fmad_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1
+; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
%fneg = fneg <2 x half> %fmad
@@ -4986,55 +3749,30 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half>
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fmad = call nsz <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
%fneg = fneg <2 x half> %fmad
@@ -5049,5 +3787,3 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX11: {{.*}}
-; GFX11-NSZ: {{.*}}
-; GFX11-SAFE: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
index 74249c1..e2d3bff 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
@@ -17,7 +17,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8))
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -42,7 +42,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16))
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -67,7 +67,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -92,7 +92,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
; RV32IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_W]], 1
; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
index a2f7e30..ab537ea 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
@@ -17,7 +17,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -42,7 +42,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -67,7 +67,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -92,7 +92,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -116,7 +116,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64))
; RV64IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_D_RV64_]], 1
; RV64IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
index f7fdc33..e547972 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
@@ -15,7 +15,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -61,7 +61,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32))
+ ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32))
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -86,7 +86,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -113,7 +113,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -140,7 +140,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
index 178586c..f34826c 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
@@ -15,7 +15,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -61,7 +61,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32))
+ ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32))
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -84,7 +84,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY]], [[COPY1]] :: (load store monotonic (s64))
+ ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY1]], [[COPY]] :: (load store monotonic (s64))
; CHECK-NEXT: $x10 = COPY [[AMOADD_D]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -109,7 +109,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -136,7 +136,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -163,7 +163,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[SUB]] :: (load store monotonic (s32))
+ ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[SUB]], [[COPY]] :: (load store monotonic (s32))
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -190,7 +190,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
diff --git a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
new file mode 100644
index 0000000..3fff2a8
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
@@ -0,0 +1,214 @@
+; UNSUPPORTED:expensive_checks
+; RUN:llc -O0 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-O0 %s
+; RUN:llc -O1 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+; RUN:llc -O2 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+; RUN:llc -O3 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+;
+; REQUIRES:asserts
+
+; SPIRV-O0:Target Library Information
+; SPIRV-O0-NEXT:Target Pass Configuration
+; SPIRV-O0-NEXT:Machine Module Information
+; SPIRV-O0-NEXT:Target Transform Information
+; SPIRV-O0-NEXT:Create Garbage Collector Module Metadata
+; SPIRV-O0-NEXT:Assumption Cache Tracker
+; SPIRV-O0-NEXT:Profile summary info
+; SPIRV-O0-NEXT:Machine Branch Probability Analysis
+; SPIRV-O0-NEXT: ModulePass Manager
+; SPIRV-O0-NEXT: Pre-ISel Intrinsic Lowering
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: Expand large div/rem
+; SPIRV-O0-NEXT: Expand fp
+; SPIRV-O0-NEXT: Lower Garbage Collection Instructions
+; SPIRV-O0-NEXT: Shadow Stack GC Lowering
+; SPIRV-O0-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-O0-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
+; SPIRV-O0-NEXT: Scalarize Masked Memory Intrinsics
+; SPIRV-O0-NEXT: Expand reduction intrinsics
+; SPIRV-O0-NEXT: SPIR-V Regularizer
+; SPIRV-O0-NEXT: SPIRV prepare functions
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: Lower invoke and unwind, for unwindless code generators
+; SPIRV-O0-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-O0-NEXT: SPIRV strip convergent intrinsics
+; SPIRV-O0-NEXT: SPIRV Legalize Implicit Binding
+; SPIRV-O0-NEXT: SPIRV CBuffer Access
+; SPIRV-O0-NEXT: SPIRV emit intrinsics
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: SPIRV legalize bitcast pass
+; SPIRV-O0-NEXT: Prepare callbr
+; SPIRV-O0-NEXT: Safe Stack instrumentation pass
+; SPIRV-O0-NEXT: Insert stack protectors
+; SPIRV-O0-NEXT: Analysis containing CSE Info
+; SPIRV-O0-NEXT: IRTranslator
+; SPIRV-O0-NEXT: Analysis for ComputingKnownBits
+; SPIRV-O0-NEXT: MachineDominator Tree Construction
+; SPIRV-O0-NEXT: SPIRVPreLegalizerCombiner
+; SPIRV-O0-NEXT: SPIRV pre legalizer
+; SPIRV-O0-NEXT: Analysis containing CSE Info
+; SPIRV-O0-NEXT: Legalizer
+; SPIRV-O0-NEXT: SPIRV post legalizer
+; SPIRV-O0-NEXT: Analysis for ComputingKnownBits
+; SPIRV-O0-NEXT: Dominator Tree Construction
+; SPIRV-O0-NEXT: Natural Loop Information
+; SPIRV-O0-NEXT: Lazy Branch Probability Analysis
+; SPIRV-O0-NEXT: Lazy Block Frequency Analysis
+; SPIRV-O0-NEXT: InstructionSelect
+; SPIRV-O0-NEXT: ResetMachineFunction
+; SPIRV-O0-NEXT: Finalize ISel and expand pseudo-instructions
+; SPIRV-O0-NEXT: Local Stack Slot Allocation
+; SPIRV-O0-NEXT: Remove Redundant DEBUG_VALUE analysis
+; SPIRV-O0-NEXT: Fixup Statepoint Caller Saved
+; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT: Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT: Prologue/Epilogue Insertion & Frame Finalization
+; SPIRV-O0-NEXT: Post-RA pseudo instruction expansion pass
+; SPIRV-O0-NEXT: Analyze Machine Code For Garbage Collection
+; SPIRV-O0-NEXT: Insert fentry calls
+; SPIRV-O0-NEXT: Insert XRay ops
+; SPIRV-O0-NEXT: Machine Sanitizer Binary Metadata
+; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT: Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT: Stack Frame Layout Analysis
+; SPIRV-O0-NEXT: SPIRV module analysis
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT: Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT: SPIRV Assembly Printer
+; SPIRV-O0-NEXT: Free MachineFunction
+
+; SPIRV-Opt:Target Library Information
+; SPIRV-Opt-NEXT:Target Pass Configuration
+; SPIRV-Opt-NEXT:Machine Module Information
+; SPIRV-Opt-NEXT:Target Transform Information
+; SPIRV-Opt-NEXT:Assumption Cache Tracker
+; SPIRV-Opt-NEXT:Type-Based Alias Analysis
+; SPIRV-Opt-NEXT:Scoped NoAlias Alias Analysis
+; SPIRV-Opt-NEXT:Profile summary info
+; SPIRV-Opt-NEXT:Create Garbage Collector Module Metadata
+; SPIRV-Opt-NEXT:Machine Branch Probability Analysis
+; SPIRV-Opt-NEXT: ModulePass Manager
+; SPIRV-Opt-NEXT: Pre-ISel Intrinsic Lowering
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: Expand large div/rem
+; SPIRV-Opt-NEXT: Expand fp
+; SPIRV-Opt-NEXT: Dominator Tree Construction
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Canonicalize natural loops
+; SPIRV-Opt-NEXT: Scalar Evolution Analysis
+; SPIRV-Opt-NEXT: Loop Pass Manager
+; SPIRV-Opt-NEXT: Canonicalize Freeze Instructions in Loops
+; SPIRV-Opt-NEXT: Induction Variable Users
+; SPIRV-Opt-NEXT: Loop Strength Reduction
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Function Alias Analysis Results
+; SPIRV-Opt-NEXT: Merge contiguous icmps into a memcmp
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT: Expand memcmp() to load/stores
+; SPIRV-Opt-NEXT: Lower Garbage Collection Instructions
+; SPIRV-Opt-NEXT: Shadow Stack GC Lowering
+; SPIRV-Opt-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Post-Dominator Tree Construction
+; SPIRV-Opt-NEXT: Branch Probability Analysis
+; SPIRV-Opt-NEXT: Block Frequency Analysis
+; SPIRV-Opt-NEXT: Constant Hoisting
+; SPIRV-Opt-NEXT: Replace intrinsics with calls to vector library
+; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT: Optimization Remark Emitter
+; SPIRV-Opt-NEXT: Partially inline calls to library functions
+; SPIRV-Opt-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
+; SPIRV-Opt-NEXT: Scalarize Masked Memory Intrinsics
+; SPIRV-Opt-NEXT: Expand reduction intrinsics
+; SPIRV-Opt-NEXT: SPIR-V Regularizer
+; SPIRV-Opt-NEXT: SPIRV prepare functions
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: Dominator Tree Construction
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: CodeGen Prepare
+; SPIRV-Opt-NEXT: Lower invoke and unwind, for unwindless code generators
+; SPIRV-Opt-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-Opt-NEXT: SPIRV strip convergent intrinsics
+; SPIRV-Opt-NEXT: SPIRV Legalize Implicit Binding
+; SPIRV-Opt-NEXT: SPIRV CBuffer Access
+; SPIRV-Opt-NEXT: SPIRV emit intrinsics
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: SPIRV legalize bitcast pass
+; SPIRV-Opt-NEXT: Dominator Tree Construction
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Function Alias Analysis Results
+; SPIRV-Opt-NEXT: ObjC ARC contraction
+; SPIRV-Opt-NEXT: Prepare callbr
+; SPIRV-Opt-NEXT: Safe Stack instrumentation pass
+; SPIRV-Opt-NEXT: Insert stack protectors
+; SPIRV-Opt-NEXT: Analysis containing CSE Info
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Post-Dominator Tree Construction
+; SPIRV-Opt-NEXT: Branch Probability Analysis
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Function Alias Analysis Results
+; SPIRV-Opt-NEXT: IRTranslator
+; SPIRV-Opt-NEXT: Analysis for ComputingKnownBits
+; SPIRV-Opt-NEXT: MachineDominator Tree Construction
+; SPIRV-Opt-NEXT: SPIRVPreLegalizerCombiner
+; SPIRV-Opt-NEXT: SPIRV pre legalizer
+; SPIRV-Opt-NEXT: Analysis containing CSE Info
+; SPIRV-Opt-NEXT: Legalizer
+; SPIRV-Opt-NEXT: SPIRV post legalizer
+; SPIRV-Opt-NEXT: Analysis for ComputingKnownBits
+; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT: InstructionSelect
+; SPIRV-Opt-NEXT: ResetMachineFunction
+; SPIRV-Opt-NEXT: Finalize ISel and expand pseudo-instructions
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Early Tail Duplication
+; SPIRV-Opt-NEXT: Optimize machine instruction PHIs
+; SPIRV-Opt-NEXT: Slot index numbering
+; SPIRV-Opt-NEXT: Merge disjoint stack slots
+; SPIRV-Opt-NEXT: Local Stack Slot Allocation
+; SPIRV-Opt-NEXT: Remove dead machine instructions
+; SPIRV-Opt-NEXT: MachineDominator Tree Construction
+; SPIRV-Opt-NEXT: Machine Natural Loop Construction
+; SPIRV-Opt-NEXT: Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Early Machine Loop Invariant Code Motion
+; SPIRV-Opt-NEXT: MachineDominator Tree Construction
+; SPIRV-Opt-NEXT: Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Common Subexpression Elimination
+; SPIRV-Opt-NEXT: MachinePostDominator Tree Construction
+; SPIRV-Opt-NEXT: Machine Cycle Info Analysis
+; SPIRV-Opt-NEXT: Machine code sinking
+; SPIRV-Opt-NEXT: Peephole Optimizations
+; SPIRV-Opt-NEXT: Remove dead machine instructions
+; SPIRV-Opt-NEXT: Remove Redundant DEBUG_VALUE analysis
+; SPIRV-Opt-NEXT: Fixup Statepoint Caller Saved
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT: Prologue/Epilogue Insertion & Frame Finalization
+; SPIRV-Opt-NEXT: Tail Duplication
+; SPIRV-Opt-NEXT: Post-RA pseudo instruction expansion pass
+; SPIRV-Opt-NEXT: Analyze Machine Code For Garbage Collection
+; SPIRV-Opt-NEXT: Insert fentry calls
+; SPIRV-Opt-NEXT: Insert XRay ops
+; SPIRV-Opt-NEXT: Machine Sanitizer Binary Metadata
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT: Stack Frame Layout Analysis
+; SPIRV-Opt-NEXT: SPIRV module analysis
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT: SPIRV Assembly Printer
+; SPIRV-Opt-NEXT: Free MachineFunction
+
+define void @empty() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 52f57dc..a8d37be 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -434,7 +434,6 @@ entry:
define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-LABEL: stest_f16i16:
; CHECK: .functype stest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
@@ -474,15 +473,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
-; CHECK-NEXT: local.tee 8
-; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
-; CHECK-NEXT: local.tee 9
-; CHECK-NEXT: i32x4.max_s
-; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT: local.tee 10
-; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.splat
@@ -495,13 +485,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: local.get 8
-; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: local.get 9
-; CHECK-NEXT: i32x4.max_s
-; CHECK-NEXT: local.get 10
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i16x8.narrow_i32x4_s
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
@@ -516,7 +500,6 @@ entry:
define <8 x i16> @utest_f16i16(<8 x half> %x) {
; CHECK-LABEL: utest_f16i16:
; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
@@ -556,9 +539,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT: local.tee 8
-; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.splat
@@ -571,8 +551,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: local.get 8
-; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
@@ -1861,7 +1839,6 @@ entry:
define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-LABEL: stest_f16i16_mm:
; CHECK: .functype stest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
@@ -1901,15 +1878,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
-; CHECK-NEXT: local.tee 8
-; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
-; CHECK-NEXT: local.tee 9
-; CHECK-NEXT: i32x4.max_s
-; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT: local.tee 10
-; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.splat
@@ -1922,13 +1890,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: local.get 8
-; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: local.get 9
-; CHECK-NEXT: i32x4.max_s
-; CHECK-NEXT: local.get 10
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i16x8.narrow_i32x4_s
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
@@ -1941,7 +1903,6 @@ entry:
define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
; CHECK-LABEL: utest_f16i16_mm:
; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
@@ -1981,9 +1942,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT: local.tee 8
-; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.splat
@@ -1996,8 +1954,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: local.get 8
-; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
diff --git a/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll
new file mode 100644
index 0000000..f3f3ba9
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2
+
+define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_signed:
+; CHECK: .functype i16_signed (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i8x16.narrow_i16x8_s
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
+ %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
+ %3 = trunc nsw <16 x i16> %2 to <16 x i8>
+ ret <16 x i8> %3
+ ret <16 x i8> %3
+}
+
+define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_signed:
+; CHECK: .functype i32_signed (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_s
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
+ %2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
+ %3 = trunc nsw <8 x i32> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <8 x i16> @i32_signed_flipped(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_signed_flipped:
+; CHECK: .functype i32_signed_flipped (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_s
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> splat (i32 32767), <8 x i32> %0)
+ %2 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> splat (i32 -32768), <8 x i32> %1)
+ %3 = trunc nsw <8 x i32> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_unsigned:
+; CHECK: .functype i16_unsigned (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
+ %2 = trunc nuw <16 x i16> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_unsigned:
+; CHECK: .functype i32_unsigned (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
+ %2 = trunc nsw <8 x i32> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
diff --git a/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll b/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll
new file mode 100644
index 0000000..c302d41
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -fast-isel -fast-isel-abort=1 < %s -o - | FileCheck %s
+
+define i64 @ptrtoaddr(ptr %p) {
+; CHECK-LABEL: ptrtoaddr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: retq
+ %addr = ptrtoaddr ptr %p to i64
+ ret i64 %addr
+}
diff --git a/llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll b/llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll
new file mode 100644
index 0000000..92aedfe
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll
@@ -0,0 +1,36 @@
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=1 | FileCheck %s
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=10 | FileCheck %s
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=100 | FileCheck %s
+
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=1 -fast-isel=true | FileCheck %s
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=10 -fast-isel=true | FileCheck %s
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=100 -fast-isel=true | FileCheck %s
+
+; This test has the same purpose as the instr-ref-opt-bisect.ll, to check if
+; during opt-bisect's optimisation level change we won't run into an assert.
+; This is simply testing different IR.
+
+; CHECK: DBG_VALUE
+
+target triple = "x86_64-pc-windows-msvc"
+
+define i1 @foo(i32 %arg) !dbg !3 {
+entry:
+ #dbg_value(i32 %arg, !4, !DIExpression(), !5)
+ switch i32 %arg, label %bb [
+ i32 810, label %bb
+ ], !dbg !5
+bb:
+ %a = load volatile i1, ptr null, align 1
+ ret i1 false
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "instr-ref-opt-bisect2.ll", directory: ".")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "instr-ref-opt-bisect2", file: !1, unit: !0)
+!4 = !DILocalVariable(name: "arg", arg: 2, scope: !3)
+!5 = !DILocation(line: 0, scope: !3)
diff --git a/llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s b/llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s
new file mode 100644
index 0000000..f8baf37
--- /dev/null
+++ b/llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s
@@ -0,0 +1,95 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK-ERROR
+
+tlbip ALLE1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip PAALL
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip PAALLOS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip RPALOS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip RPAOS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s b/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s
index a21f762..73653d0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s
@@ -1,64 +1,71 @@
-// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX1250,GFX1250-ASM %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250,GFX1250-DIS %s
s_mov_b64 s[2:3], 0x10abcdef12345678
-// GFX1250: s_mov_b64 s[2:3], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_mov_b64 s[2:3], 0x10abcdef12345678 ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_add_nc_u64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_add_nc_u64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_add_nc_u64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_mul_u64 s[2:3], 0x10abcdef12345678, s[4:5]
-// GFX1250: s_mul_u64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_mul_u64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_and_b64 s[2:3], 0x10abcdef12345678, s[4:5]
-// GFX1250: s_and_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_and_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_or_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_or_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_or_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_xor_b64 s[2:3], 0x10abcdef12345678, s[4:5]
-// GFX1250: s_xor_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_xor_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_and_not1_b64 s[2:3], 0x10abcdef12345678, 0x10abcdef12345678
-// GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), lit64(0x10abcdef12345678) ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, 0x10abcdef12345678 ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_or_not1_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_andn2_b64 s[2:3], 0x10abcdef12345678, s[4:5]
-// GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_orn2_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_or_not1_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_nand_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_nand_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_nand_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_nor_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_nor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_nor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_xnor_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_xnor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_xnor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_lshl_b64 s[2:3], 0x10abcdef12345678, s4
-// GFX1250: s_lshl_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_lshl_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_lshr_b64 s[2:3], 0x10abcdef12345678, s4
-// GFX1250: s_lshr_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_lshr_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_ashr_i64 s[2:3], 0x10abcdef12345678, s4
-// GFX1250: s_ashr_i64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_ashr_i64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_bfe_u64 s[2:3], 0x10abcdef12345678, 5
-// GFX1250: s_bfe_u64 s[2:3], lit64(0x10abcdef12345678), 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_bfe_u64 s[2:3], 0x10abcdef12345678, 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_bfe_i64 s[2:3], 0x80abcdef12345678, 5
-// GFX1250: s_bfe_i64 s[2:3], lit64(0x80abcdef12345678), 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80]
+// GFX1250: s_bfe_i64 s[2:3], 0x80abcdef12345678, 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80]
s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_cselect_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
s_mov_b64 s[2:3], 0xffffffff01234567
-// GFX1250: s_mov_b64 s[2:3], lit64(0xffffffff01234567) ; encoding: [0xfe,0x01,0x82,0xbe,0x67,0x45,0x23,0x01,0xff,0xff,0xff,0xff]
+// GFX1250: s_mov_b64 s[2:3], 0xffffffff01234567 ; encoding: [0xfe,0x01,0x82,0xbe,0x67,0x45,0x23,0x01,0xff,0xff,0xff,0xff]
+// TODO: disasm
s_mov_b64 s[2:3], lit64(0x777)
-// GFX1250: s_mov_b64 s[2:3], 0x777 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00]
+// GFX1250-ASM: s_mov_b64 s[2:3], lit64(0x777) ; encoding: [0xfe,0x01,0x82,0xbe,0x77,0x07,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: s_mov_b64 s[2:3], 0x777 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00]
+
+s_mov_b64 s[2:3], 0x777
+// GFX1250: s_mov_b64 s[2:3], 0x777 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
index 5cf484f..cc351af 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
@@ -1,61 +1,63 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
s_add_pc_i64 s[2:3]
// GFX1250: s_add_pc_i64 s[2:3] ; encoding: [0x02,0x4b,0x80,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
s_add_pc_i64 4
// GFX1250: s_add_pc_i64 4 ; encoding: [0x84,0x4b,0x80,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
s_add_pc_i64 100
// GFX1250: s_add_pc_i64 0x64 ; encoding: [0xff,0x4b,0x80,0xbe,0x64,0x00,0x00,0x00]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
s_add_pc_i64 0x12345678abcd0
-// GFX1250: s_add_pc_i64 lit64(0x12345678abcd0) ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX1250: s_add_pc_i64 0x12345678abcd0 ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
s_get_pc_i64 s[2:3]
// GFX1250: s_get_pc_i64 s[2:3] ; encoding: [0x00,0x47,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
s_getpc_b64 s[2:3]
// GFX1250: s_get_pc_i64 s[2:3] ; encoding: [0x00,0x47,0x82,0xbe]
s_set_pc_i64 s[2:3]
// GFX1250: s_set_pc_i64 s[2:3] ; encoding: [0x02,0x48,0x80,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
s_setpc_b64 s[2:3]
// GFX1250: s_set_pc_i64 s[2:3] ; encoding: [0x02,0x48,0x80,0xbe]
s_swap_pc_i64 s[2:3], 10
// GFX1250: s_swap_pc_i64 s[2:3], 10 ; encoding: [0x8a,0x49,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
s_swappc_b64 s[2:3], 10
// GFX1250: s_swap_pc_i64 s[2:3], 10 ; encoding: [0x8a,0x49,0x82,0xbe]
s_rfe_i64 s[2:3]
// GFX1250: s_rfe_i64 s[2:3] ; encoding: [0x02,0x4a,0x80,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
s_rfe_b64 s[2:3]
// GFX1250: s_rfe_i64 s[2:3] ; encoding: [0x02,0x4a,0x80,0xbe]
s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE)
// GFX1250: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE) ; encoding: [0x88,0x4c,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: specified message id is not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:31: error: specified message id is not supported on this GPU
s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE)
// GFX1250: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE) ; encoding: [0x88,0x4d,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: specified message id is not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:35: error: specified message id is not supported on this GPU
s_get_shader_cycles_u64 s[2:3]
// GFX1250: s_get_shader_cycles_u64 s[2:3] ; encoding: [0x00,0x06,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
s_barrier_signal -3
// GFX1250: s_barrier_signal -3 ; encoding: [0xc3,0x4e,0x80,0xbe]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s b/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s
index 7395a51..58da119 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s
@@ -1,211 +1,213 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
v_ceil_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_ceil_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cvt_f32_f64 v255, 0x10abcdef12345678
-// GFX1250: v_cvt_f32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cvt_f32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cvt_i32_f64 v255, 0x10abcdef12345678
-// GFX1250: v_cvt_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cvt_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cvt_u32_f64 v255, 0x10abcdef12345678
-// GFX1250: v_cvt_u32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cvt_u32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_floor_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_floor_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_floor_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_fract_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_fract_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_fract_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_frexp_exp_i32_f64 v255, 0x10abcdef12345678
-// GFX1250: v_frexp_exp_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_frexp_exp_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_frexp_mant_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_frexp_mant_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_frexp_mant_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_rcp_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_rcp_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_rcp_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_rndne_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_rndne_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_rndne_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_rsq_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_rsq_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_rsq_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_sqrt_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_sqrt_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_sqrt_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_trunc_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_trunc_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_trunc_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_add_f64 v[254:255], 0x10abcdef12345678, v[254:255]
-// GFX1250: v_add_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_add_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_max_num_f64 v[254:255], 0x10abcdef12345678, v[254:255]
-// GFX1250: v_max_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_max_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_min_num_f64 v[254:255], 0x10abcdef12345678, v[254:255]
-// GFX1250: v_min_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_min_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_mul_f64 v[254:255], 0x10abcdef12345678, v[254:255]
-// GFX1250: v_mul_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_mul_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_class_f64 vcc_lo, 0x10abcdef12345678, v255
-// GFX1250: v_cmp_class_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_class_f64_e32 vcc_lo, 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_eq_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_eq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_eq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_ge_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_ge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_ge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_gt_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_gt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_gt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_gt_i64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_gt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_gt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_gt_u64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_gt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_gt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_le_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_le_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_le_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_le_i64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_le_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_le_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_le_u64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_le_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_le_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_lg_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_lg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_lg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_lt_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_lt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_lt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_lt_i64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_lt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_lt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_lt_u64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_lt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_lt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_ne_i64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_ne_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_ne_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_ne_u64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_ne_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_ne_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_neq_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_neq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_neq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_nge_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_nge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_nge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_ngt_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_ngt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_ngt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_nle_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_nle_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_nle_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_nlg_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_nlg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_nlg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_nlt_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_nlt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_nlt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_o_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_o_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_o_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmp_u_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_u_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_u_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_class_f64 0x10abcdef12345678, v255
-// GFX1250: v_cmpx_class_f64_e32 lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_class_f64_e32 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_eq_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_eq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_eq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_eq_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_eq_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_eq_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_eq_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_eq_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_eq_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_ge_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_ge_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ge_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ge_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_ge_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ge_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ge_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_gt_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_gt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_gt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_gt_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_gt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_gt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_gt_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_gt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_gt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_le_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_le_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_le_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_le_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_le_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_le_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_le_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_le_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_le_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_lg_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_lg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_lg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_lt_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_lt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_lt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_lt_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_lt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_lt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_lt_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_lt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_lt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_ne_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ne_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ne_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_ne_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ne_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ne_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_neq_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_neq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_neq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_nge_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_nge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_nge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_ngt_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ngt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ngt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_nle_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_nle_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_nle_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_nlg_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_nlg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_nlg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_nlt_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_nlt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_nlt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_o_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_o_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_o_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_cmpx_u_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_u_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_u_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
v_ceil_f64 v[254:255], 153.1
-// GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x4063233333333333) ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40]
+// GFX1250: v_ceil_f64_e32 v[254:255], 0x4063233333333333 ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40]
v_ceil_f64 v[254:255], 1.5e22
-// GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x448969368974c05b) ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44]
+// GFX1250: v_ceil_f64_e32 v[254:255], 0x448969368974c05b ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44]
// These 64-bit literals can be represented as 32-bit with encoding 255. HW behavior:
// 64 bit float: the lower 32-bit are padded with zero
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 811c6eb..6950c72 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -26,7 +26,7 @@ v_mov_b64 v[4:5], 0.5
// GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e]
v_mov_b64 v[254:255], 0xaf123456
-// GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456 ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
v_tanh_f32 v5, v1
// GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 40fcd6f..0d61c1f 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -1,5 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
v_mov_b64_e32 v[4:5], v[2:3]
// GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e]
@@ -26,7 +27,7 @@ v_mov_b64 v[4:5], 0.5
// GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e]
v_mov_b64 v[254:255], 0xaf123456
-// GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456 ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
v_tanh_f32 v5, v1
// GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e]
@@ -628,8 +629,8 @@ v_cvt_f16_fp8 v1.l, 0x1234
v_cvt_f16_fp8 v1.h, v2
// GFX1250: v_cvt_f16_fp8_e32 v1.h, v2 ; encoding: [0x02,0xef,0x02,0x7f]
-v_cvt_pk_f16_bf8 v1, v2
-// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e]
+v_cvt_pk_f16_bf8 v1, v2.l
+// GFX1250: v_cvt_pk_f16_bf8 v1, v2.l ; encoding: [0x02,0xed,0x02,0x7e]
v_cvt_pk_f16_bf8 v1, s2
// GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e]
@@ -637,8 +638,8 @@ v_cvt_pk_f16_bf8 v1, s2
v_cvt_pk_f16_bf8 v1, 100
// GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]
-v_cvt_pk_f16_fp8 v1, v2
-// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e]
+v_cvt_pk_f16_fp8 v1, v2.l
+// GFX1250: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e]
v_cvt_pk_f16_fp8 v1, s2
// GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e]
@@ -694,8 +695,8 @@ v_cvt_pk_f32_fp8_e32 v[2:3], s3
v_cvt_pk_f32_fp8_e32 v[2:3], 3
// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xdc,0x04,0x7e]
-v_cvt_pk_f32_fp8_e32 v[2:3], v3
-// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xdd,0x04,0x7e]
+v_cvt_pk_f32_fp8_e32 v[2:3], v3.l
+// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], v3.l ; encoding: [0x03,0xdd,0x04,0x7e]
v_cvt_pk_f32_fp8_e32 v[4:5], v127.h
// GFX1250: v_cvt_pk_f32_fp8_e32 v[4:5], v127.h ; encoding: [0xff,0xdd,0x08,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
index 0a1d3bf..02872b0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
@@ -1,5 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250,GFX1250-ASM %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250,GFX1250-DIS %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200-ERR --implicit-check-not=error: %s
v_fmac_f64 v[4:5], v[2:3], v[4:5]
@@ -195,7 +196,7 @@ v_add_nc_u64 v[4:5], -4.0, v[4:5]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_add_nc_u64 v[4:5], 0xaf123456, v[4:5]
-// GFX1250: v_add_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_add_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_add_nc_u64 v[4:5], 0x3f717273, v[4:5]
@@ -315,7 +316,7 @@ v_sub_nc_u64 v[4:5], -4.0, v[4:5]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_sub_nc_u64 v[4:5], 0xaf123456, v[4:5]
-// GFX1250: v_sub_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_sub_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_sub_nc_u64 v[4:5], 0x3f717273, v[4:5]
@@ -435,7 +436,7 @@ v_mul_u64 v[4:5], -4.0, v[4:5]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_mul_u64 v[4:5], 0xaf123456, v[4:5]
-// GFX1250: v_mul_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_mul_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_mul_u64 v[4:5], 0x3f717273, v[4:5]
@@ -519,7 +520,7 @@ v_fmamk_f64 v[6:7], v[254:255], 0x405ec00000000000, v[2:3]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3]
-// GFX1250: v_fmamk_f64 v[6:7], s[2:3], lit64(0x405ec00012345678), v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmamk_f64 v[6:7], vcc, 0x405ec000, v[2:3]
@@ -551,7 +552,7 @@ v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[254:255]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255]
-// GFX1250: v_fmamk_f64 v[254:255], lit64(0x405ec00012345678), lit64(0x405ec00012345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmamk_f64 v[254:255], 123.0, 0x405ec000, v[2:3]
@@ -559,15 +560,15 @@ v_fmamk_f64 v[254:255], 123.0, 0x405ec000, v[2:3]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmamk_f64 v[4:5], v[2:3], 123.1, v[6:7]
-// GFX1250: v_fmamk_f64 v[4:5], v[2:3], lit64(0x405ec66666666666), v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[4:5], v[2:3], 0x405ec66666666666, v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmamk_f64 v[4:5], 0x405ec66666666666, 123.1, v[6:7]
-// GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmamk_f64 v[4:5], 123.1, 123.1, v[8:9]
-// GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmamk_f64 v[4:5], 1.0, 1.0, v[6:7]
@@ -595,7 +596,7 @@ v_fmaak_f64 v[6:7], v[254:255], v[8:9], 0x405ec00000000000
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678
-// GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], lit64(0x405ec00012345678) ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678 ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmaak_f64 v[6:7], vcc, v[8:9], 0x405ec000
@@ -631,27 +632,28 @@ v_fmaak_f64 v[254:255], 0x405ec00000000000, v[254:255], 0x405ec00000000000
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678
-// GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit(0x405ec00012345678)
-// GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
-// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250-ASM: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250-DIS: v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1200-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
v_fmaak_f64 v[254:255], 123.0, v[2:3], 0x405ec000
// GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[2:3], 0x405ec000 ; encoding: [0xfe,0x04,0xfc,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmaak_f64 v[4:5], v[2:3], v[2:3], 123.1
-// GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], lit64(0x405ec66666666666) ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], 0x405ec66666666666 ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 123.1
-// GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[6:7], lit64(0x405ec66666666666) ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 0x405ec66666666666 ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmaak_f64 v[4:5], 123.1, v[8:9], 123.1
-// GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[8:9], lit64(0x405ec66666666666) ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[8:9], 0x405ec66666666666 ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
v_fmaak_f64 v[4:5], 1.0, v[8:9], 1.0
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
index 949847e..ad5771b 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
@@ -1,6 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefixes=GFX12,GFX1200 %s
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
s_alloc_vgpr 0x1235
// GFX12: s_alloc_vgpr 0x1235 ; encoding: [0xff,0x53,0x80,0xbe,0x35,0x12,0x00,0x00]
@@ -859,7 +860,7 @@ s_mov_b64 s[0:1], 0x3f717273
s_mov_b64 s[0:1], 0xaf123456
// GFX1200: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_mov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_mov_b64 s[0:1], null
// GFX12: s_mov_b64 s[0:1], null ; encoding: [0x7c,0x01,0x80,0xbe]
@@ -968,7 +969,7 @@ s_cmov_b64 s[0:1], 0x3f717273
s_cmov_b64 s[0:1], 0xaf123456
// GFX1200: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cmov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_not_b32 s0, s1
// GFX12: s_not_b32 s0, s1 ; encoding: [0x01,0x1e,0x80,0xbe]
@@ -1071,7 +1072,7 @@ s_not_b64 s[0:1], 0x3f717273
s_not_b64 s[0:1], 0xaf123456
// GFX1200: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_not_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_wqm_b32 s0, s1
// GFX12: s_wqm_b32 s0, s1 ; encoding: [0x01,0x1c,0x80,0xbe]
@@ -1174,7 +1175,7 @@ s_wqm_b64 s[0:1], 0x3f717273
s_wqm_b64 s[0:1], 0xaf123456
// GFX1200: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_wqm_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_brev_b32 s0, s1
// GFX12: s_brev_b32 s0, s1 ; encoding: [0x01,0x04,0x80,0xbe]
@@ -1277,7 +1278,7 @@ s_brev_b64 s[0:1], 0x3f717273
s_brev_b64 s[0:1], 0xaf123456
// GFX1200: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_brev_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_bcnt0_i32_b32 s0, s1
// GFX12: s_bcnt0_i32_b32 s0, s1 ; encoding: [0x01,0x16,0x80,0xbe]
@@ -1389,7 +1390,7 @@ s_bcnt0_i32_b64 s0, 0x3f717273
s_bcnt0_i32_b64 s0, 0xaf123456
// GFX1200: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_bcnt0_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_bcnt1_i32_b32 s0, s1
// GFX12: s_bcnt1_i32_b32 s0, s1 ; encoding: [0x01,0x18,0x80,0xbe]
@@ -1501,7 +1502,7 @@ s_bcnt1_i32_b64 s0, 0x3f717273
s_bcnt1_i32_b64 s0, 0xaf123456
// GFX1200: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_bcnt1_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_ff1_i32_b32 s0, s1
// GFX12: s_ctz_i32_b32 s0, s1 ; encoding: [0x01,0x08,0x80,0xbe]
@@ -1613,7 +1614,7 @@ s_ff1_i32_b64 s0, 0x3f717273
s_ff1_i32_b64 s0, 0xaf123456
// GFX1200: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_ctz_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_flbit_i32_b32 s0, s1
// GFX12: s_clz_i32_u32 s0, s1 ; encoding: [0x01,0x0a,0x80,0xbe]
@@ -1725,7 +1726,7 @@ s_flbit_i32_b64 s0, 0x3f717273
s_flbit_i32_b64 s0, 0xaf123456
// GFX1200: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_clz_i32_u64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_flbit_i32 s0, s1
// GFX12: s_cls_i32 s0, s1 ; encoding: [0x01,0x0c,0x80,0xbe]
@@ -1837,7 +1838,7 @@ s_flbit_i32_i64 s0, 0x3f717273
s_flbit_i32_i64 s0, 0xaf123456
// GFX1200: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cls_i32_i64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_sext_i32_i8 s0, s1
// GFX12: s_sext_i32_i8 s0, s1 ; encoding: [0x01,0x0e,0x80,0xbe]
@@ -2283,7 +2284,7 @@ s_and_saveexec_b64 s[0:1], 0x3f717273
s_and_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_or_saveexec_b64 s[0:1], s[2:3]
// GFX12: s_or_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x23,0x80,0xbe]
@@ -2323,7 +2324,7 @@ s_or_saveexec_b64 s[0:1], 0x3f717273
s_or_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_xor_saveexec_b64 s[0:1], s[2:3]
// GFX12: s_xor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x25,0x80,0xbe]
@@ -2363,7 +2364,7 @@ s_xor_saveexec_b64 s[0:1], 0x3f717273
s_xor_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_andn2_saveexec_b64 s[0:1], s[2:3]
// GFX12: s_and_not1_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x31,0x80,0xbe]
@@ -2403,7 +2404,7 @@ s_andn2_saveexec_b64 s[0:1], 0x3f717273
s_andn2_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_orn2_saveexec_b64 s[0:1], s[2:3]
// GFX12: s_or_not1_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x33,0x80,0xbe]
@@ -2443,7 +2444,7 @@ s_orn2_saveexec_b64 s[0:1], 0x3f717273
s_orn2_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_nand_saveexec_b64 s[0:1], s[2:3]
// GFX12: s_nand_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x27,0x80,0xbe]
@@ -2483,7 +2484,7 @@ s_nand_saveexec_b64 s[0:1], 0x3f717273
s_nand_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nand_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_nor_saveexec_b64 s[0:1], s[2:3]
// GFX12: s_nor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x29,0x80,0xbe]
@@ -2523,7 +2524,7 @@ s_nor_saveexec_b64 s[0:1], 0x3f717273
s_nor_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_xnor_saveexec_b64 s[0:1], s[2:3]
// GFX12: s_xnor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x2b,0x80,0xbe]
@@ -2563,7 +2564,7 @@ s_xnor_saveexec_b64 s[0:1], 0x3f717273
s_xnor_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xnor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_quadmask_b32 s0, s1
// GFX12: s_quadmask_b32 s0, s1 ; encoding: [0x01,0x1a,0x80,0xbe]
@@ -2666,7 +2667,7 @@ s_quadmask_b64 s[0:1], 0x3f717273
s_quadmask_b64 s[0:1], 0xaf123456
// GFX1200: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_quadmask_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_movrels_b32 s0, s1
// GFX12: s_movrels_b32 s0, s1 ; encoding: [0x01,0x40,0x80,0xbe]
@@ -2811,7 +2812,7 @@ s_movreld_b64 s[0:1], 0x3f717273
s_movreld_b64 s[0:1], 0xaf123456
// GFX1200: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_movreld_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_abs_i32 s0, s1
// GFX12: s_abs_i32 s0, s1 ; encoding: [0x01,0x15,0x80,0xbe]
@@ -2911,7 +2912,7 @@ s_andn1_saveexec_b64 s[0:1], 0x3f717273
s_andn1_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_orn1_saveexec_b64 s[0:1], s[2:3]
// GFX12: s_or_not0_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x2f,0x80,0xbe]
@@ -2951,7 +2952,7 @@ s_orn1_saveexec_b64 s[0:1], 0x3f717273
s_orn1_saveexec_b64 s[0:1], 0xaf123456
// GFX1200: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_andn1_wrexec_b64 s[0:1], s[2:3]
// GFX12: s_and_not0_wrexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x35,0x80,0xbe]
@@ -2991,7 +2992,7 @@ s_andn1_wrexec_b64 s[0:1], 0x3f717273
s_andn1_wrexec_b64 s[0:1], 0xaf123456
// GFX1200: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not0_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_andn2_wrexec_b64 s[0:1], s[2:3]
// GFX12: s_and_not1_wrexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x37,0x80,0xbe]
@@ -3031,7 +3032,7 @@ s_andn2_wrexec_b64 s[0:1], 0x3f717273
s_andn2_wrexec_b64 s[0:1], 0xaf123456
// GFX1200: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_bitreplicate_b64_b32 s[0:1], s2
// GFX12: s_bitreplicate_b64_b32 s[0:1], s2 ; encoding: [0x02,0x14,0x80,0xbe]
@@ -3830,7 +3831,7 @@ s_ctz_i32_b64 exec_hi, src_scc
s_ctz_i32_b64 null, 0xaf123456
// GFX1200: s_ctz_i32_b64 null, 0xaf123456 ; encoding: [0xff,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_ctz_i32_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_ctz_i32_b64 null, 0xaf123456 ; encoding: [0xfe,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_and_not1_saveexec_b64 s[10:11], s[2:3]
// GFX12: s_and_not1_saveexec_b64 s[10:11], s[2:3] ; encoding: [0x02,0x31,0x8a,0xbe]
@@ -3858,7 +3859,7 @@ s_and_not1_saveexec_b64 ttmp[14:15], src_scc
s_and_not1_saveexec_b64 null, 0xaf123456
// GFX1200: s_and_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_and_not0_saveexec_b32 s5, s1
// GFX12: s_and_not0_saveexec_b32 s5, s1 ; encoding: [0x01,0x2c,0x85,0xbe]
@@ -3919,7 +3920,7 @@ s_and_not0_saveexec_b64 ttmp[14:15], src_scc
s_and_not0_saveexec_b64 null, 0xaf123456
// GFX1200: s_and_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not0_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_and_not0_wrexec_b32 s5, s1
// GFX12: s_and_not0_wrexec_b32 s5, s1 ; encoding: [0x01,0x34,0x85,0xbe]
@@ -3980,7 +3981,7 @@ s_and_not0_wrexec_b64 ttmp[14:15], src_scc
s_and_not0_wrexec_b64 null, 0xaf123456
// GFX1200: s_and_not0_wrexec_b64 null, 0xaf123456 ; encoding: [0xff,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not0_wrexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not0_wrexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_and_not1_saveexec_b32 s5, s1
// GFX12: s_and_not1_saveexec_b32 s5, s1 ; encoding: [0x01,0x30,0x85,0xbe]
@@ -4074,7 +4075,7 @@ s_and_not1_wrexec_b64 ttmp[14:15], src_scc
s_and_not1_wrexec_b64 null, 0xaf123456
// GFX1200: s_and_not1_wrexec_b64 null, 0xaf123456 ; encoding: [0xff,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_wrexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_wrexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_cls_i32 s5, s1
// GFX12: s_cls_i32 s5, s1 ; encoding: [0x01,0x0c,0x85,0xbe]
@@ -4144,7 +4145,7 @@ s_cls_i32_i64 exec_hi, src_scc
s_cls_i32_i64 null, 0xaf123456
// GFX1200: s_cls_i32_i64 null, 0xaf123456 ; encoding: [0xff,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cls_i32_i64 null, lit64(0xaf123456) ; encoding: [0xfe,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cls_i32_i64 null, 0xaf123456 ; encoding: [0xfe,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_clz_i32_u32 s5, s1
// GFX12: s_clz_i32_u32 s5, s1 ; encoding: [0x01,0x0a,0x85,0xbe]
@@ -4214,7 +4215,7 @@ s_clz_i32_u64 exec_hi, src_scc
s_clz_i32_u64 null, 0xaf123456
// GFX1200: s_clz_i32_u64 null, 0xaf123456 ; encoding: [0xff,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_clz_i32_u64 null, lit64(0xaf123456) ; encoding: [0xfe,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_clz_i32_u64 null, 0xaf123456 ; encoding: [0xfe,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_or_not0_saveexec_b32 s5, s1
// GFX12: s_or_not0_saveexec_b32 s5, s1 ; encoding: [0x01,0x2e,0x85,0xbe]
@@ -4275,7 +4276,7 @@ s_or_not0_saveexec_b64 ttmp[14:15], src_scc
s_or_not0_saveexec_b64 null, 0xaf123456
// GFX1200: s_or_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not0_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_or_not1_saveexec_b32 s5, s1
// GFX12: s_or_not1_saveexec_b32 s5, s1 ; encoding: [0x01,0x32,0x85,0xbe]
@@ -4336,4 +4337,4 @@ s_or_not1_saveexec_b64 ttmp[14:15], src_scc
s_or_not1_saveexec_b64 null, 0xaf123456
// GFX1200: s_or_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
index 2ecec4c..9c83879 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
@@ -1,6 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefixes=GFX12,GFX1200 %s
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
s_add_nc_u64 s[0:1], s[2:3], s[4:5]
// GFX12: s_add_nc_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0xa9]
@@ -55,7 +56,7 @@ s_add_nc_u64 s[0:1], 0x3f717273, s[2:3]
s_add_nc_u64 s[0:1], 0xaf123456, s[2:3]
// GFX1200: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf]
-// GFX1250: s_add_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_add_nc_u64 s[0:1], s[2:3], exec
// GFX12: s_add_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xa9]
@@ -80,7 +81,7 @@ s_add_nc_u64 s[0:1], s[2:3], 0x3f717273
s_add_nc_u64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf]
-// GFX1250: s_add_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_sub_nc_u64 s[0:1], s[2:3], s[4:5]
// GFX12: s_sub_nc_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x00,0xaa]
@@ -135,7 +136,7 @@ s_sub_nc_u64 s[0:1], 0x3f717273, s[2:3]
s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3]
// GFX1200: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf]
-// GFX1250: s_sub_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_sub_nc_u64 s[0:1], s[2:3], exec
// GFX12: s_sub_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x00,0xaa]
@@ -160,7 +161,7 @@ s_sub_nc_u64 s[0:1], s[2:3], 0x3f717273
s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf]
-// GFX1250: s_sub_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_mul_u64 s[0:1], s[2:3], s[4:5]
// GFX12: s_mul_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0xaa]
@@ -215,7 +216,7 @@ s_mul_u64 s[0:1], 0x3f717273, s[2:3]
s_mul_u64 s[0:1], 0xaf123456, s[2:3]
// GFX1200: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf]
-// GFX1250: s_mul_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_mul_u64 s[0:1], s[2:3], exec
// GFX12: s_mul_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xaa]
@@ -240,7 +241,7 @@ s_mul_u64 s[0:1], s[2:3], 0x3f717273
s_mul_u64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf]
-// GFX1250: s_mul_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_add_f32 s5, s1, s2
// GFX12: s_add_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa0]
@@ -2358,7 +2359,7 @@ s_cselect_b64 s[0:1], 0x3f717273, s[4:5]
s_cselect_b64 s[0:1], 0xaf123456, s[4:5]
// GFX1200: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cselect_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_cselect_b64 s[0:1], s[2:3], exec
// GFX12: s_cselect_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x98]
@@ -2383,7 +2384,7 @@ s_cselect_b64 s[0:1], s[2:3], 0x3f717273
s_cselect_b64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cselect_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_and_b32 s0, s1, s2
// GFX12: s_and_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8b]
@@ -2552,7 +2553,7 @@ s_and_b64 s[0:1], 0x3f717273, s[4:5]
s_and_b64 s[0:1], 0xaf123456, s[4:5]
// GFX1200: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_and_b64 s[0:1], s[2:3], exec
// GFX12: s_and_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8b]
@@ -2577,7 +2578,7 @@ s_and_b64 s[0:1], s[2:3], 0x3f717273
s_and_b64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_or_b32 s0, s1, s2
// GFX12: s_or_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8c]
@@ -2737,7 +2738,7 @@ s_or_b64 s[0:1], 0x3f717273, s[4:5]
s_or_b64 s[0:1], 0xaf123456, s[4:5]
// GFX1200: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_or_b64 s[0:1], s[2:3], exec
// GFX12: s_or_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8c]
@@ -2762,7 +2763,7 @@ s_or_b64 s[0:1], s[2:3], 0x3f717273
s_or_b64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_xor_b32 s0, s1, s2
// GFX12: s_xor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8d]
@@ -2922,7 +2923,7 @@ s_xor_b64 s[0:1], 0x3f717273, s[4:5]
s_xor_b64 s[0:1], 0xaf123456, s[4:5]
// GFX1200: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_xor_b64 s[0:1], s[2:3], exec
// GFX12: s_xor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8d]
@@ -2947,7 +2948,7 @@ s_xor_b64 s[0:1], s[2:3], 0x3f717273
s_xor_b64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_andn2_b32 s0, s1, s2
// GFX12: s_and_not1_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x91]
@@ -3107,7 +3108,7 @@ s_andn2_b64 s[0:1], 0x3f717273, s[4:5]
s_andn2_b64 s[0:1], 0xaf123456, s[4:5]
// GFX1200: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_andn2_b64 s[0:1], s[2:3], exec
// GFX12: s_and_not1_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x91]
@@ -3132,7 +3133,7 @@ s_andn2_b64 s[0:1], s[2:3], 0x3f717273
s_andn2_b64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_orn2_b32 s0, s1, s2
// GFX12: s_or_not1_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x92]
@@ -3292,7 +3293,7 @@ s_orn2_b64 s[0:1], 0x3f717273, s[4:5]
s_orn2_b64 s[0:1], 0xaf123456, s[4:5]
// GFX1200: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_orn2_b64 s[0:1], s[2:3], exec
// GFX12: s_or_not1_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x92]
@@ -3317,7 +3318,7 @@ s_orn2_b64 s[0:1], s[2:3], 0x3f717273
s_orn2_b64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_nand_b32 s0, s1, s2
// GFX12: s_nand_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8e]
@@ -3477,7 +3478,7 @@ s_nand_b64 s[0:1], 0x3f717273, s[4:5]
s_nand_b64 s[0:1], 0xaf123456, s[4:5]
// GFX1200: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nand_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_nand_b64 s[0:1], s[2:3], exec
// GFX12: s_nand_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8e]
@@ -3502,7 +3503,7 @@ s_nand_b64 s[0:1], s[2:3], 0x3f717273
s_nand_b64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nand_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_nor_b32 s0, s1, s2
// GFX12: s_nor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8f]
@@ -3662,7 +3663,7 @@ s_nor_b64 s[0:1], 0x3f717273, s[4:5]
s_nor_b64 s[0:1], 0xaf123456, s[4:5]
// GFX1200: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_nor_b64 s[0:1], s[2:3], exec
// GFX12: s_nor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8f]
@@ -3687,7 +3688,7 @@ s_nor_b64 s[0:1], s[2:3], 0x3f717273
s_nor_b64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_xnor_b32 s0, s1, s2
// GFX12: s_xnor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x90]
@@ -3847,7 +3848,7 @@ s_xnor_b64 s[0:1], 0x3f717273, s[4:5]
s_xnor_b64 s[0:1], 0xaf123456, s[4:5]
// GFX1200: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xnor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_xnor_b64 s[0:1], s[2:3], exec
// GFX12: s_xnor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x90]
@@ -3872,7 +3873,7 @@ s_xnor_b64 s[0:1], s[2:3], 0x3f717273
s_xnor_b64 s[0:1], s[2:3], 0xaf123456
// GFX1200: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xnor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_lshl_b32 s0, s1, s2
// GFX12: s_lshl_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x84]
@@ -4032,7 +4033,7 @@ s_lshl_b64 s[0:1], 0x3f717273, s4
s_lshl_b64 s[0:1], 0xaf123456, s4
// GFX1200: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf]
-// GFX1250: s_lshl_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_lshl_b64 s[0:1], s[2:3], exec_lo
// GFX12: s_lshl_b64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x84]
@@ -4216,7 +4217,7 @@ s_lshr_b64 s[0:1], 0x3f717273, s4
s_lshr_b64 s[0:1], 0xaf123456, s4
// GFX1200: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf]
-// GFX1250: s_lshr_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_lshr_b64 s[0:1], s[2:3], exec_lo
// GFX12: s_lshr_b64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x85]
@@ -4400,7 +4401,7 @@ s_ashr_i64 s[0:1], 0x3f717273, s4
s_ashr_i64 s[0:1], 0xaf123456, s4
// GFX1200: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf]
-// GFX1250: s_ashr_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_ashr_i64 s[0:1], s[2:3], exec_lo
// GFX12: s_ashr_i64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x86]
@@ -4995,7 +4996,7 @@ s_bfe_u64 s[0:1], 0x3f717273, s4
s_bfe_u64 s[0:1], 0xaf123456, s4
// GFX1200: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf]
-// GFX1250: s_bfe_u64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_bfe_u64 s[0:1], s[2:3], exec_lo
// GFX12: s_bfe_u64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x00,0x94]
@@ -5074,7 +5075,7 @@ s_bfe_i64 s[0:1], 0x3f717273, s4
s_bfe_i64 s[0:1], 0xaf123456, s4
// GFX1200: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf]
-// GFX1250: s_bfe_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_bfe_i64 s[0:1], s[2:3], exec_lo
// GFX12: s_bfe_i64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x94]
@@ -6278,7 +6279,7 @@ s_and_not1_b64 s[10:11], vcc, ttmp[14:15]
s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456
// GFX1200: s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xff,0x8a,0x91,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_b64 s[10:11], ttmp[14:15], lit64(0xaf123456) ; encoding: [0x7a,0xfe,0x8a,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_and_not1_b64 s[10:11], exec, src_scc
// GFX12: s_and_not1_b64 s[10:11], exec, src_scc ; encoding: [0x7e,0xfd,0x8a,0x91]
@@ -6297,7 +6298,7 @@ s_and_not1_b64 exec, src_scc, exec
s_and_not1_b64 null, 0xaf123456, vcc
// GFX1200: s_and_not1_b64 null, 0xaf123456, vcc ; encoding: [0xff,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_b64 null, lit64(0xaf123456), vcc ; encoding: [0xfe,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_b64 null, 0xaf123456, vcc ; encoding: [0xfe,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_or_not1_b64 s[10:11], s[2:3], s[4:5]
// GFX12: s_or_not1_b64 s[10:11], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x8a,0x92]
@@ -6310,7 +6311,7 @@ s_or_not1_b64 s[10:11], vcc, ttmp[14:15]
s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456
// GFX1200: s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xff,0x8a,0x92,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_b64 s[10:11], ttmp[14:15], lit64(0xaf123456) ; encoding: [0x7a,0xfe,0x8a,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_or_not1_b64 s[10:11], exec, src_scc
// GFX12: s_or_not1_b64 s[10:11], exec, src_scc ; encoding: [0x7e,0xfd,0x8a,0x92]
@@ -6329,4 +6330,4 @@ s_or_not1_b64 exec, src_scc, exec
s_or_not1_b64 null, 0xaf123456, vcc
// GFX1200: s_or_not1_b64 null, 0xaf123456, vcc ; encoding: [0xff,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_b64 null, lit64(0xaf123456), vcc ; encoding: [0xfe,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_b64 null, 0xaf123456, vcc ; encoding: [0xfe,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s
index cedba66d..98bb3c3 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s
@@ -1,6 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1200 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
s_cmp_lt_f32 s1, s2
// GFX12: s_cmp_lt_f32 s1, s2 ; encoding: [0x01,0x02,0x41,0xbf]
@@ -2119,7 +2120,7 @@ s_cmp_eq_u64 s[0:1], 0x3f717273
s_cmp_eq_u64 s[0:1], 0xaf123456
// GFX1200: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cmp_eq_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
s_cmp_lg_u64 s[0:1], s[2:3]
// GFX12: s_cmp_lg_u64 s[0:1], s[2:3] ; encoding: [0x00,0x02,0x11,0xbf]
@@ -2162,4 +2163,4 @@ s_cmp_lg_u64 s[0:1], 0x3f717273
s_cmp_lg_u64 s[0:1], 0xaf123456
// GFX1200: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cmp_lg_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index 7839475..78aa8f2 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -1,935 +1,1849 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5
// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI
// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=SICI,CI
-// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GFX89
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX89,GFX9
-
-// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error:
-// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCIVI,NOSICIVI --implicit-check-not=error:
-// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICIVI,NOVI,NOGFX89 --implicit-check-not=error:
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX89,NOGFX9 --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX89
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX89,GFX9
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX11
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX12
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX1250
+
+// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOSI --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCI --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX89,NOVI --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX89,NOGFX9 --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX11 --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX12 --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 %s -mattr=+real-true16 -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX1250 --implicit-check-not=error:
//---------------------------------------------------------------------------//
// fp literal, expected fp operand
//---------------------------------------------------------------------------//
-// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e]
v_fract_f64 v[0:1], 0.5
+// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e]
-// SICI: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e]
-// GFX89: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x50,0x00,0x7e]
v_sqrt_f64 v[0:1], -4.0
+// SICI: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e]
+// GFX89: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x50,0x00,0x7e]
+// GFX12XX: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e]
+// GFX11: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e]
-// SICI: v_log_clamp_f32_e32 v1, 0.5 ; encoding: [0xf0,0x4c,0x02,0x7e]
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_log_clamp_f32 v1, 0.5
+// NOGFX8PLUS: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// SICI: v_log_clamp_f32_e32 v1, 0.5 ; encoding: [0xf0,0x4c,0x02,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e]
-v_fract_f64 v[0:1], 0.5
-
-// SICI: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x38,0x00,0x7e]
v_trunc_f32 v0, 0.5
+// SICI: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x64,0x00,0x7e]
v_fract_f64 v[0:1], -1.0
+// SICI: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e]
-// SICI: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x38,0x00,0x7e]
v_trunc_f32 v0, -1.0
+// SICI: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x64,0x00,0x7e]
v_fract_f64 v[0:1], 4.0
+// SICI: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e]
-// SICI: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x38,0x00,0x7e]
v_trunc_f32 v0, 4.0
+// SICI: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e]
v_fract_f64 v[0:1], 0.0
+// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e]
-// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e]
v_trunc_f32 v0, 0.0
+// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
-// GFX89: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf8,0x3f]
v_fract_f64 v[0:1], 1.5
+// SICI: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
+// GFX89: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf8,0x3f]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
+// GFX11: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
-// SICI: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
-// GFX89: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0xc0,0x3f]
v_trunc_f32 v0, 1.5
+// SICI: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
+// GFX89: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0xc0,0x3f]
+// GFX12XX: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
+// GFX11: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
-// SICI: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
-// GFX89: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x64,0x00,0x7e,0xca,0x21,0x09,0xc0]
v_fract_f64 v[0:1], -3.1415
+// SICI: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
+// GFX89: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x64,0x00,0x7e,0xca,0x21,0x09,0xc0]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
+// GFX12: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
+// GFX1250: v_fract_f64_e32 v[0:1], 0xc00921cac083126f ; encoding: [0xfe,0x7c,0x00,0x7e,0x6f,0x12,0x83,0xc0,0xca,0x21,0x09,0xc0]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// SICI: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
-// GFX89: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x38,0x00,0x7e,0x56,0x0e,0x49,0xc0]
v_trunc_f32 v0, -3.1415
+// SICI: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
+// GFX89: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x38,0x00,0x7e,0x56,0x0e,0x49,0xc0]
+// GFX12XX: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
+// GFX11: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
-// SICI: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
-// GFX89: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x64,0x00,0x7e,0x02,0x2d,0xb5,0x44]
v_fract_f64 v[0:1], 100000000000000000000000.0
+// SICI: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
+// GFX89: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x64,0x00,0x7e,0x02,0x2d,0xb5,0x44]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
+// GFX12: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
+// GFX1250: v_fract_f64_e32 v[0:1], 0x44b52d02c7e14af6 ; encoding: [0xfe,0x7c,0x00,0x7e,0xf6,0x4a,0xe1,0xc7,0x02,0x2d,0xb5,0x44]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// SICI: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
-// GFX89: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x38,0x00,0x7e,0x16,0x68,0xa9,0x65]
v_trunc_f32 v0, 100000000000000000000000.0
+// SICI: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
+// GFX89: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x38,0x00,0x7e,0x16,0x68,0xa9,0x65]
+// GFX12XX: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
+// GFX11: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
-// SICI: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
-// GFX89: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x64,0x00,0x7e,0xd0,0x12,0x63,0x41]
v_fract_f64 v[0:1], 10000000.0
+// SICI: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
+// GFX89: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x64,0x00,0x7e,0xd0,0x12,0x63,0x41]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
+// GFX11: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
-// SICI: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
-// GFX89: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x38,0x00,0x7e,0x80,0x96,0x18,0x4b]
v_trunc_f32 v0, 10000000.0
+// SICI: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
+// GFX89: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x38,0x00,0x7e,0x80,0x96,0x18,0x4b]
+// GFX12XX: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
+// GFX11: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
-// SICI: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
-// GFX89: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xef,0x47]
v_fract_f64 v[0:1], 3.402823e+38
+// SICI: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
+// GFX89: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xef,0x47]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
+// GFX12: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
+// GFX1250: v_fract_f64_e32 v[0:1], 0x47efffff966ad924 ; encoding: [0xfe,0x7c,0x00,0x7e,0x24,0xd9,0x6a,0x96,0xff,0xff,0xef,0x47]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// SICI: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
-// GFX89: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x38,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
v_trunc_f32 v0, 3.402823e+38
+// SICI: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
+// GFX89: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x38,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
+// GFX12XX: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
+// GFX11: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
-// SICI: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
-// GFX89: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0x1f,0x38]
v_fract_f64 v[0:1], 2.3509886e-38
+// SICI: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
+// GFX89: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0x1f,0x38]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
+// GFX12: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
+// GFX1250: v_fract_f64_e32 v[0:1], 0x381fffffe8c9d9fb ; encoding: [0xfe,0x7c,0x00,0x7e,0xfb,0xd9,0xc9,0xe8,0xff,0xff,0x1f,0x38]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// SICI: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
-// GFX89: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x38,0x00,0x7e,0xff,0xff,0xff,0x00]
v_trunc_f32 v0, 2.3509886e-38
+// SICI: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
+// GFX89: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x38,0x00,0x7e,0xff,0xff,0xff,0x00]
+// GFX12XX: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
+// GFX11: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
-// SICI: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
-// GFX89: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x64,0x00,0x7e,0x23,0xf6,0x79,0x31]
v_fract_f64 v[0:1], 2.3509886e-70
+// SICI: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
+// GFX89: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x64,0x00,0x7e,0x23,0xf6,0x79,0x31]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
+// GFX12: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
+// GFX1250: v_fract_f64_e32 v[0:1], 0x3179f623c2d3cf3c ; encoding: [0xfe,0x7c,0x00,0x7e,0x3c,0xcf,0xd3,0xc2,0x23,0xf6,0x79,0x31]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f32 v0, 2.3509886e-70
+// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
+
+v_fract_f64_e32 v[0:1], 1.0
+// SICI: v_fract_f64_e32 v[0:1], 1.0 ; encoding: [0xf2,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 1.0 ; encoding: [0xf2,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 1.0 ; encoding: [0xf2,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 1.0 ; encoding: [0xf2,0x7c,0x00,0x7e]
+
+v_fract_f64_e32 v[0:1], lit(1.0)
+// SICI: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
+// GFX89: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf0,0x3f]
+// GFX11: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
+// GFX12: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
+// GFX1250: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00]
+
+v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0xca,0x1b]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:54: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-4]]:54: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-5]]:54: error: invalid operand for instruction
+
+v_cos_f16_e32 v5.l, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// GFX11: v_cos_f16_e32 v5.l, 1.0 ; encoding: [0xf2,0xc2,0x0a,0x7e]
+// GFX1250: v_cos_f16_e32 v5.l, 1.0 ; encoding: [0xf2,0xc2,0x0a,0x7e]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_cos_f16_e32 v5.l, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// GFX11: v_cos_f16_e32 v5.l, lit(0x3c00) ; encoding: [0xff,0xc2,0x0a,0x7e,0x00,0x3c,0x00,0x00]
+// GFX1250: v_cos_f16_e32 v5.l, lit(0x3c00) ; encoding: [0xff,0xc2,0x0a,0x7e,0x00,0x3c,0x00,0x00]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_tanh_bf16 v5, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_tanh_bf16_e32 v5, 1.0 ; encoding: [0xf2,0x94,0x0a,0x7e]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_tanh_bf16 v5, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_tanh_bf16_e32 v5, lit(0x3f80) ; encoding: [0xff,0x94,0x0a,0x7e,0x80,0x3f,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_trunc_f32_e32 v0, 1.0
+// SICI: v_trunc_f32_e32 v0, 1.0 ; encoding: [0xf2,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 1.0 ; encoding: [0xf2,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 1.0 ; encoding: [0xf2,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 1.0 ; encoding: [0xf2,0x42,0x00,0x7e]
+
+v_trunc_f32_e32 v0, lit(1.0)
+// SICI: v_trunc_f32_e32 v0, lit(0x3f800000) ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
+// GFX89: v_trunc_f32_e32 v0, lit(0x3f800000) ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0x80,0x3f]
+// GFX12XX: v_trunc_f32_e32 v0, lit(0x3f800000) ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
+// GFX11: v_trunc_f32_e32 v0, lit(0x3f800000) ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
+
+v_dot2_bf16_bf16 v5.l, v1, v2, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_bf16_bf16 v5.l, v1, v2, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, lit(0x3f80) ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0x80,0x3f,0x00,0x00]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_f32_f16 v5, v1, 1.0, v2
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_f32_f16 v5, v1, 1.0, v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xe5,0x09,0x1c]
+// GFX12: v_dot2_f32_f16 v5, v1, 1.0, v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xe5,0x09,0x1c]
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_f32_f16 v5, v1, lit(1.0), v2
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_f32_f16 v5, v1, lit(0x3c00), v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x00,0x3c,0x00,0x00]
+// GFX12: v_dot2_f32_f16 v5, v1, lit(0x3c00), v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x00,0x3c,0x00,0x00]
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_fp8_f16 v1.l, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3c00 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_fp8_f16 v1.l, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x3c00) ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
//---------------------------------------------------------------------------//
// fp literal, expected int operand
//---------------------------------------------------------------------------//
-// SICI: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], 0.5
+// GFX8PLUS: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x04,0x80,0xbe]
+
+s_mov_b64 s[0:1], lit(0.5)
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
-// SICI: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x26]
v_and_b32_e32 v0, 0.5, v1
+// SICI: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36]
-// SICI: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf0,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf0,0x02,0x02,0x00]
v_and_b32_e64 v0, 0.5, v1
+// SICI: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf0,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf0,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf0,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf0,0x02,0x02,0x00]
-// SICI: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], -1.0
+// GFX8PLUS: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x04,0x80,0xbe]
-// SICI: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x26]
v_and_b32_e32 v0, -1.0, v1
+// SICI: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36]
-// SICI: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf3,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf3,0x02,0x02,0x00]
v_and_b32_e64 v0, -1.0, v1
+// SICI: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf3,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf3,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf3,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf3,0x02,0x02,0x00]
-// SICI: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], 4.0
+// GFX8PLUS: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x04,0x80,0xbe]
-// SICI: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x26]
v_and_b32_e32 v0, 4.0, v1
+// SICI: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36]
-// SICI: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf6,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf6,0x02,0x02,0x00]
v_and_b32_e64 v0, 4.0, v1
+// SICI: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf6,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf6,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf6,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf6,0x02,0x02,0x00]
-// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], 0.0
+// GFX8PLUS: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe]
-// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26]
v_and_b32_e32 v0, 0.0, v1
+// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
-// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
v_and_b32_e64 v0, 0.0, v1
+// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64_e32 s[0:1], 1.5
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
-// SICI: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
-// GFX89: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0xc0,0x3f]
v_and_b32_e32 v0, 1.5, v1
+// SICI: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
+// GFX89: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0xc0,0x3f]
+// GFX12XX: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
+// GFX11: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64_e32 s[0:1], -3.1415
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
-// SICI: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
-// GFX89: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x26,0x56,0x0e,0x49,0xc0]
v_and_b32_e32 v0, -3.1415, v1
+// SICI: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
+// GFX89: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x26,0x56,0x0e,0x49,0xc0]
+// GFX12XX: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
+// GFX11: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64_e32 s[0:1], 100000000000000000000000.0
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
-// SICI: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
-// GFX89: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x26,0x16,0x68,0xa9,0x65]
v_and_b32_e32 v0, 100000000000000000000000.0, v1
+// SICI: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
+// GFX89: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x26,0x16,0x68,0xa9,0x65]
+// GFX12XX: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
+// GFX11: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64_e32 s[0:1], 10000000.0
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
-// SICI: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
-// GFX89: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x26,0x80,0x96,0x18,0x4b]
v_and_b32_e32 v0, 10000000.0, v1
+// SICI: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
+// GFX89: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x26,0x80,0x96,0x18,0x4b]
+// GFX12XX: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
+// GFX11: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64_e32 s[0:1], 3.402823e+38
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
-// SICI: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
-// GFX89: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x26,0xfd,0xff,0x7f,0x7f]
v_and_b32_e32 v0, 3.402823e+38, v1
+// SICI: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
+// GFX89: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x26,0xfd,0xff,0x7f,0x7f]
+// GFX12XX: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
+// GFX11: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64_e32 s[0:1], 2.3509886e-38
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
-// SICI: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
-// GFX89: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x26,0xff,0xff,0xff,0x00]
v_and_b32_e32 v0, 2.3509886e-38, v1
+// SICI: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
+// GFX89: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x26,0xff,0xff,0xff,0x00]
+// GFX12XX: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
+// GFX11: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64_e32 s[0:1], 2.3509886e-70
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_and_b32_e32 v0, 2.3509886e-70, v1
+// NOGCN: :[[@LINE-1]]:19: error: invalid operand for instruction
+
+v_not_b16 v5.l, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_not_b16_e32 v5.l, 1.0 ; encoding: [0xf2,0xd2,0x0a,0x7e]
+// GFX1250: v_not_b16_e32 v5.l, 1.0 ; encoding: [0xf2,0xd2,0x0a,0x7e]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_not_b16 v5.l, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_not_b16_e32 v5.l, lit(0x3f800000) ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f]
+// GFX1250: v_not_b16_e32 v5.l, lit(0x3f800000) ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_and_b32_e32 v0, 1.0, v1
+// SICI: v_and_b32_e32 v0, 1.0, v1 ; encoding: [0xf2,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 1.0, v1 ; encoding: [0xf2,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 1.0, v1 ; encoding: [0xf2,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 1.0, v1 ; encoding: [0xf2,0x02,0x00,0x36]
+
+v_and_b32_e32 v0, lit(1.0), v1
+// SICI: v_and_b32_e32 v0, lit(0x3f800000), v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
+// GFX89: v_and_b32_e32 v0, lit(0x3f800000), v1 ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0x80,0x3f]
+// GFX12XX: v_and_b32_e32 v0, lit(0x3f800000), v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
+// GFX11: v_and_b32_e32 v0, lit(0x3f800000), v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
+
+v_pk_add_u16 v5, exec_lo, 1.0
+// GFX12XX: v_pk_add_u16 v5, exec_lo, 1.0 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xe4,0x01,0x18]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_pk_add_u16 v5, exec_lo, 1.0 ; encoding: [0x05,0x40,0x8a,0xd3,0x7e,0xe4,0x01,0x18]
+// GFX11: v_pk_add_u16 v5, exec_lo, 1.0 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xe4,0x01,0x18]
+// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_pk_add_u16 v5, exec_lo, lit(1.0)
+// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x3f800000) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x00,0x00,0x80,0x3f]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_pk_add_u16 v5, exec_lo, lit(0x3f800000) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x00,0x00,0x80,0x3f]
+// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-5]]:31: error: invalid operand (violates constant bus restrictions)
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1.0 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xca,0x03]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(0x3f800000) ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x00,0x00,0x80,0x3f]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
//---------------------------------------------------------------------------//
// int literal, expected fp operand
//---------------------------------------------------------------------------//
-// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e]
v_trunc_f32_e32 v0, 0
+// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
+
+v_fract_f64_e32 v[0:1], 1
+// SICI: v_fract_f64_e32 v[0:1], 1 ; encoding: [0x81,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 1 ; encoding: [0x81,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 1 ; encoding: [0x81,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 1 ; encoding: [0x81,0x7c,0x00,0x7e]
+
+v_fract_f64_e32 v[0:1], lit(1)
+// SICI: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x64,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX11: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX12: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX1250: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
-// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e]
-v_fract_f64_e32 v[0:1], 0
-
-// SICI: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x42,0xd3,0x80,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00]
v_trunc_f32_e64 v0, 0
+// SICI: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x42,0xd3,0x80,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0xa1,0xd5,0x80,0x00,0x00,0x00]
+// GFX11: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0xa1,0xd5,0x80,0x00,0x00,0x00]
-// SICI: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x72,0xd1,0x80,0x00,0x00,0x00]
v_fract_f64_e64 v[0:1], 0
+// SICI: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x72,0xd1,0x80,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0xbe,0xd5,0x80,0x00,0x00,0x00]
+// GFX11: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0xbe,0xd5,0x80,0x00,0x00,0x00]
-// SICI: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x38,0x00,0x7e]
v_trunc_f32_e32 v0, -13
+// SICI: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x64,0x00,0x7e]
v_fract_f64_e32 v[0:1], -13
+// SICI: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e]
-// SICI: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x42,0xd3,0xcd,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x5c,0xd1,0xcd,0x00,0x00,0x00]
v_trunc_f32_e64 v0, -13
+// SICI: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x42,0xd3,0xcd,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x5c,0xd1,0xcd,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0xa1,0xd5,0xcd,0x00,0x00,0x00]
+// GFX11: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0xa1,0xd5,0xcd,0x00,0x00,0x00]
-// SICI: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x7c,0xd3,0xcd,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x72,0xd1,0xcd,0x00,0x00,0x00]
v_fract_f64_e64 v[0:1], -13
+// SICI: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x7c,0xd3,0xcd,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x72,0xd1,0xcd,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0xbe,0xd5,0xcd,0x00,0x00,0x00]
+// GFX11: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0xbe,0xd5,0xcd,0x00,0x00,0x00]
-// SICI: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x38,0x00,0x7e]
v_trunc_f32_e32 v0, 35
+// SICI: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x64,0x00,0x7e]
v_fract_f64_e32 v[0:1], 35
+// SICI: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e]
-// SICI: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x42,0xd3,0xa3,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x5c,0xd1,0xa3,0x00,0x00,0x00]
v_trunc_f32_e64 v0, 35
+// SICI: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x42,0xd3,0xa3,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x5c,0xd1,0xa3,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0xa1,0xd5,0xa3,0x00,0x00,0x00]
+// GFX11: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0xa1,0xd5,0xa3,0x00,0x00,0x00]
-// SICI: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x7c,0xd3,0xa3,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x72,0xd1,0xa3,0x00,0x00,0x00]
v_fract_f64_e64 v[0:1], 35
+// SICI: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x7c,0xd3,0xa3,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x72,0xd1,0xa3,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0xbe,0xd5,0xa3,0x00,0x00,0x00]
+// GFX11: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0xbe,0xd5,0xa3,0x00,0x00,0x00]
-// SICI: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// GFX89: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x38,0x00,0x7e,0xd2,0x04,0x00,0x00]
v_trunc_f32_e32 v0, 1234
+// SICI: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX89: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x38,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX12XX: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX11: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// SICI: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// GFX89: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00]
v_fract_f64_e32 v[0:1], 1234
+// SICI: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX89: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX11: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
v_trunc_f32_e64 v0, 1234
+// GFX12XX: v_trunc_f32_e64 v0, 0x4d2 ; encoding: [0x00,0x00,0xa1,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:21: error: literal operands are not supported
+// NOGFX89: :[[@LINE-3]]:21: error: literal operands are not supported
+// GFX11: v_trunc_f32_e64 v0, 0x4d2 ; encoding: [0x00,0x00,0xa1,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICIVI: :[[@LINE-1]]:21: error: literal operands are not supported
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
v_fract_f64_e64 v[0:1], 1234
+// GFX12XX: v_fract_f64_e64 v[0:1], 0x4d2 ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:25: error: literal operands are not supported
+// NOGFX89: :[[@LINE-3]]:25: error: literal operands are not supported
+// GFX11: v_fract_f64_e64 v[0:1], 0x4d2 ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICIVI: :[[@LINE-1]]:25: error: literal operands are not supported
-// SICI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// GFX89: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x38,0x00,0x7e,0xcf,0x2b,0xff,0xff]
v_trunc_f32_e32 v0, -54321
+// SICI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX89: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x38,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX12XX: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX11: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// SICI: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// GFX89: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x64,0x00,0x7e,0xcf,0x2b,0xff,0xff]
v_fract_f64_e32 v[0:1], -54321
+// SICI: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX89: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x64,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX11: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// SICI: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// GFX89: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x38,0x00,0x7e,0xef,0xbe,0xad,0xde]
v_trunc_f32_e32 v0, 0xdeadbeef
+// SICI: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX89: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x38,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX12XX: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX11: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// SICI: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// GFX89: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x64,0x00,0x7e,0xef,0xbe,0xad,0xde]
v_fract_f64_e32 v[0:1], 0xdeadbeef
+// SICI: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX89: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x64,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX11: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e]
v_trunc_f32_e32 v0, 0xffffffff
+// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
-// GFX89: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xff,0xff]
v_fract_f64_e32 v[0:1], 0xffffffff
+// SICI: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
+// GFX89: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xff,0xff]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
+// GFX11: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f32_e32 v0, 0x123456789abcdef0
+// NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_fract_f64_e32 v[0:1], 0x123456789abcdef0
+// NOSICI: :[[@LINE-1]]:25: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:25: error: invalid operand for instruction
+// GFX1250: v_fract_f64_e32 v[0:1], 0x123456789abcdef0 ; encoding: [0xfe,0x7c,0x00,0x7e,0xf0,0xde,0xbc,0x9a,0x78,0x56,0x34,0x12]
+// NOGFX11: :[[@LINE-4]]:25: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:25: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:25: error: invalid operand for instruction
-// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e]
v_trunc_f32_e32 v0, 0xffffffffffffffff
+// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x64,0x00,0x7e]
v_fract_f64_e32 v[0:1], 0xffffffffffffffff
+// SICI: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e]
+
+v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0x06,0x1a]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:54: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-4]]:54: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-5]]:54: error: invalid operand for instruction
+
+v_cos_f16_e32 v5.l, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// GFX11: v_cos_f16_e32 v5.l, 1 ; encoding: [0x81,0xc2,0x0a,0x7e]
+// GFX1250: v_cos_f16_e32 v5.l, 1 ; encoding: [0x81,0xc2,0x0a,0x7e]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_cos_f16_e32 v5.l, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// GFX11: v_cos_f16_e32 v5.l, lit(0x1) ; encoding: [0xff,0xc2,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// GFX1250: v_cos_f16_e32 v5.l, lit(0x1) ; encoding: [0xff,0xc2,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_tanh_bf16 v5, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_tanh_bf16_e32 v5, 1 ; encoding: [0x81,0x94,0x0a,0x7e]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_tanh_bf16 v5, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_tanh_bf16_e32 v5, lit(0x1) ; encoding: [0xff,0x94,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_trunc_f32_e32 v0, 1
+// SICI: v_trunc_f32_e32 v0, 1 ; encoding: [0x81,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 1 ; encoding: [0x81,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 1 ; encoding: [0x81,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 1 ; encoding: [0x81,0x42,0x00,0x7e]
+
+v_trunc_f32_e32 v0, lit(1)
+// SICI: v_trunc_f32_e32 v0, lit(0x1) ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e32 v0, lit(0x1) ; encoding: [0xff,0x38,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e32 v0, lit(0x1) ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX11: v_trunc_f32_e32 v0, lit(0x1) ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
+
+v_dot2_bf16_bf16 v5.l, v1, v2, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, 1 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x06,0x02]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_bf16_bf16 v5.l, v1, v2, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, lit(0x1) ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0x01,0x00,0x00,0x00]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_f32_f16 v5, v1, 1, v2
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_f32_f16 v5, v1, 1, v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0x03,0x09,0x1c]
+// GFX12: v_dot2_f32_f16 v5, v1, 1, v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0x03,0x09,0x1c]
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_f32_f16 v5, v1, lit(1), v2
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_f32_f16 v5, v1, lit(0x1), v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x01,0x00,0x00,0x00]
+// GFX12: v_dot2_f32_f16 v5, v1, lit(0x1), v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x01,0x00,0x00,0x00]
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_fp8_f16 v1.l, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_cvt_pk_fp8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_fp8_f16 v1.l, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x1) ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
//---------------------------------------------------------------------------//
// int literal, expected int operand
//---------------------------------------------------------------------------//
-// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], 0
+// GFX8PLUS: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe]
-// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26]
v_and_b32_e32 v0, 0, v1
+// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
-// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
v_and_b32_e64 v0, 0, v1
+// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
-// SICI: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], -13
+// GFX8PLUS: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x04,0x80,0xbe]
-// SICI: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x26]
v_and_b32_e32 v0, -13, v1
+// SICI: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36]
-// SICI: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xcd,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xcd,0x02,0x02,0x00]
v_and_b32_e64 v0, -13, v1
+// SICI: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xcd,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xcd,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xcd,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xcd,0x02,0x02,0x00]
-// SICI: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], 35
+// GFX8PLUS: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x04,0x80,0xbe]
-// SICI: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x26]
v_and_b32_e32 v0, 35, v1
+// SICI: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36]
-// SICI: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xa3,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xa3,0x02,0x02,0x00]
v_and_b32_e64 v0, 35, v1
+// SICI: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xa3,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xa3,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xa3,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xa3,0x02,0x02,0x00]
-// SICI: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x04,0x80,0xbe,0xd2,0x04,0x00,0x00]
-// GFX89: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x01,0x80,0xbe,0xd2,0x04,0x00,0x00]
s_mov_b64_e32 s[0:1], 1234
+// GFX8PLUS: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x01,0x80,0xbe,0xd2,0x04,0x00,0x00]
+// SICI: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x04,0x80,0xbe,0xd2,0x04,0x00,0x00]
-// SICI: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
-// GFX89: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00]
v_and_b32_e32 v0, 1234, v1
+// SICI: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
+// GFX89: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00]
+// GFX12XX: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
+// GFX11: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
v_and_b32_e64 v0, 1234, v1
+// GFX12XX: v_and_b32_e64 v0, 0x4d2, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xff,0x02,0x02,0x00,0xd2,0x04,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:19: error: literal operands are not supported
+// NOGFX89: :[[@LINE-3]]:19: error: literal operands are not supported
+// GFX11: v_and_b32_e64 v0, 0x4d2, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xff,0x02,0x02,0x00,0xd2,0x04,0x00,0x00]
+// NOSICIVI: :[[@LINE-1]]:19: error: literal operands are not supported
-// SICI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff]
-// GFX89: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
s_mov_b64_e32 s[0:1], -54321
+// SICI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff]
+// GFX89: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
+// GFX11: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
+// GFX12: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
+// GFX1250: s_mov_b64 s[0:1], 0xffffffffffff2bcf ; encoding: [0xfe,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff,0xff,0xff,0xff,0xff]
-// SICI: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
-// GFX89: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x26,0xcf,0x2b,0xff,0xff]
v_and_b32_e32 v0, -54321, v1
+// SICI: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
+// GFX89: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x26,0xcf,0x2b,0xff,0xff]
+// GFX12XX: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
+// GFX11: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
-// SICI: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde]
-// GFX89: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
s_mov_b64_e32 s[0:1], 0xdeadbeef
+// SICI: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde]
+// GFX89: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
+// GFX11: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
+// GFX12: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
+// GFX1250: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00]
-// SICI: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
-// GFX89: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x26,0xef,0xbe,0xad,0xde]
v_and_b32_e32 v0, 0xdeadbeef, v1
+// SICI: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
+// GFX89: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x26,0xef,0xbe,0xad,0xde]
+// GFX12XX: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
+// GFX11: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
-// SICI: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff]
-// GFX89: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
s_mov_b64_e32 s[0:1], 0xffffffff
+// SICI: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff]
+// GFX89: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
+// GFX11: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
+// GFX12: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
+// GFX1250: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
-// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26]
v_and_b32_e32 v0, 0xffffffff, v1
+// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64_e32 s[0:1], 0x123456789abcdef0
+// NOSICI: :[[@LINE-1]]:23: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:23: error: invalid operand for instruction
+// GFX1250: s_mov_b64 s[0:1], 0x123456789abcdef0 ; encoding: [0xfe,0x01,0x80,0xbe,0xf0,0xde,0xbc,0x9a,0x78,0x56,0x34,0x12]
+// NOGFX11: :[[@LINE-4]]:23: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:23: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:23: error: invalid operand for instruction
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_and_b32_e32 v0, 0x123456789abcdef0, v1
+// NOGCN: :[[@LINE-1]]:19: error: invalid operand for instruction
-// SICI: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], 0xffffffffffffffff
+// GFX8PLUS: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x04,0x80,0xbe]
-// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26]
v_and_b32_e32 v0, 0xffffffffffffffff, v1
+// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
+
+v_not_b16 v5.l, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_not_b16_e32 v5.l, 1 ; encoding: [0x81,0xd2,0x0a,0x7e]
+// GFX1250: v_not_b16_e32 v5.l, 1 ; encoding: [0x81,0xd2,0x0a,0x7e]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_not_b16 v5.l, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_not_b16_e32 v5.l, lit(0x1) ; encoding: [0xff,0xd2,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// GFX1250: v_not_b16_e32 v5.l, lit(0x1) ; encoding: [0xff,0xd2,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+s_mov_b64 s[0:1], 1
+// GFX8PLUS: s_mov_b64 s[0:1], 1 ; encoding: [0x81,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 1 ; encoding: [0x81,0x04,0x80,0xbe]
+
+s_mov_b64 s[0:1], lit(1)
+// SICI: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x04,0x80,0xbe,0x01,0x00,0x00,0x00]
+// GFX89: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
+// GFX11: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
+// GFX12: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
+// GFX1250: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+
+v_and_b32_e32 v0, 1, v1
+// SICI: v_and_b32_e32 v0, 1, v1 ; encoding: [0x81,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 1, v1 ; encoding: [0x81,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 1, v1 ; encoding: [0x81,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 1, v1 ; encoding: [0x81,0x02,0x00,0x36]
+
+v_and_b32_e32 v0, lit(1), v1
+// SICI: v_and_b32_e32 v0, lit(0x1), v1 ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
+// GFX89: v_and_b32_e32 v0, lit(0x1), v1 ; encoding: [0xff,0x02,0x00,0x26,0x01,0x00,0x00,0x00]
+// GFX12XX: v_and_b32_e32 v0, lit(0x1), v1 ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
+// GFX11: v_and_b32_e32 v0, lit(0x1), v1 ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
+
+v_pk_add_u16 v5, exec_lo, 1
+// GFX12XX: v_pk_add_u16 v5, exec_lo, 1 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x02,0x01,0x18]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_pk_add_u16 v5, exec_lo, 1 ; encoding: [0x05,0x40,0x8a,0xd3,0x7e,0x02,0x01,0x18]
+// GFX11: v_pk_add_u16 v5, exec_lo, 1 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x02,0x01,0x18]
+// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_pk_add_u16 v5, exec_lo, lit(1)
+// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x1) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x01,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_pk_add_u16 v5, exec_lo, lit(0x1) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x01,0x00,0x00,0x00]
+// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-5]]:31: error: invalid operand (violates constant bus restrictions)
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0x06,0x02]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(0x1) ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
//---------------------------------------------------------------------------//
// 1/(2*PI)
//---------------------------------------------------------------------------//
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f32_e32 v0, 0x3fc45f306dc9c882
+// NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882
+// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// NOSICI: :[[@LINE-3]]:25: error: invalid operand for instruction
+// GFX11: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// NOSICIVI: :[[@LINE-2]]:25: error: invalid operand for instruction
-// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e]
v_trunc_f32_e32 v0, 0x3e22f983
+// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x42,0x00,0x7e]
-// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x64,0x00,0x7e,0x83,0xf9,0x22,0x3e]
v_fract_f64_e32 v[0:1], 0x3e22f983
+// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x64,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX11: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f32_e64 v0, 0x3fc45f306dc9c882
+// NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882
+// GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0xbe,0xd5,0xf8,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-3]]:25: error: invalid operand for instruction
+// GFX11: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0xbe,0xd5,0xf8,0x00,0x00,0x00]
+// NOSICIVI: :[[@LINE-2]]:25: error: invalid operand for instruction
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX89: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
v_trunc_f32_e64 v0, 0x3e22f983
+// GFX89: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0xa1,0xd5,0xf8,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-3]]:21: error: literal operands are not supported
+// GFX11: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0xa1,0xd5,0xf8,0x00,0x00,0x00]
+// NOSICIVI: :[[@LINE-2]]:21: error: literal operands are not supported
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
v_fract_f64_e64 v[0:1], 0x3e22f983
+// GFX12XX: v_fract_f64_e64 v[0:1], 0x3e22f983 ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0x83,0xf9,0x22,0x3e]
+// NOSICI: :[[@LINE-2]]:25: error: literal operands are not supported
+// NOGFX89: :[[@LINE-3]]:25: error: literal operands are not supported
+// GFX11: v_fract_f64_e64 v[0:1], 0x3e22f983 ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0x83,0xf9,0x22,0x3e]
+// NOSICIVI: :[[@LINE-1]]:25: error: literal operands are not supported
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX89: s_mov_b64 s[0:1], 0.15915494309189532 ; encoding: [0xf8,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], 0.159154943091895317852646485335
+// GFX8PLUS: s_mov_b64 s[0:1], 0.15915494309189532 ; encoding: [0xf8,0x01,0x80,0xbe]
+// NOSICI: :[[@LINE-2]]:23: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-2]]:23: error: invalid operand for instruction
-// SICI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e]
-// GFX89: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x26]
v_and_b32_e32 v0, 0.159154943091895317852646485335, v1
+// SICI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e]
+// GFX89: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x36]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX89: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
v_and_b32_e64 v0, 0.159154943091895317852646485335, v1
+// GFX89: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf8,0x02,0x02,0x00]
+// NOSICI: :[[@LINE-3]]:19: error: literal operands are not supported
+// GFX11: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf8,0x02,0x02,0x00]
+// NOSICIVI: :[[@LINE-2]]:19: error: literal operands are not supported
-// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30 ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f]
-// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
v_fract_f64 v[0:1], 0.159154943091895317852646485335
+// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30 ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f]
+// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// NOSICI: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
-// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e]
v_trunc_f32 v0, 0.159154943091895317852646485335
+// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x42,0x00,0x7e]
+
+v_trunc_f32 v0, lit(0.159154943091895317852646485335)
+// SICI: v_trunc_f32_e32 v0, lit(0x3e22f983) ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_trunc_f32_e32 v0, lit(0x3e22f983) ; encoding: [0xff,0x38,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX12XX: v_trunc_f32_e32 v0, lit(0x3e22f983) ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX11: v_trunc_f32_e32 v0, lit(0x3e22f983) ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
//---------------------------------------------------------------------------//
// integer literal truncation checks
//---------------------------------------------------------------------------//
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b32 s0, 0x101ffffffff
+// NOGCN: :[[@LINE-1]]:15: error: invalid operand for instruction
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b32 s0, 0x1000000001
+// NOGCN: :[[@LINE-1]]:15: error: invalid operand for instruction
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b32 s0, 0x1000000fff
+// NOGCN: :[[@LINE-1]]:15: error: invalid operand for instruction
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f32 v0, 0x1fffffffff0
+// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f32 v0, 0x100000001
+// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f32 v0, 0x1fffffff000
+// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64 s[0:1], 0x101ffffffff
+// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction
+// GFX1250: s_mov_b64 s[0:1], 0x101ffffffff ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x01,0x01,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64 s[0:1], 0x1000000001
+// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction
+// GFX1250: s_mov_b64 s[0:1], 0x1000000001 ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
s_mov_b64 s[0:1], 0x1000000fff
+// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction
+// GFX1250: s_mov_b64 s[0:1], 0x1000000fff ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0x0f,0x00,0x00,0x10,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOSI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f64 v[0:1], 0x1fffffffff0
+// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction
+// GFX1250: v_trunc_f64_e32 v[0:1], 0x1fffffffff0 ; encoding: [0xfe,0x2e,0x00,0x7e,0xf0,0xff,0xff,0xff,0xff,0x01,0x00,0x00]
+// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOSI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f64 v[0:1], 0x100000001
+// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction
+// GFX1250: v_trunc_f64_e32 v[0:1], 0x100000001 ; encoding: [0xfe,0x2e,0x00,0x7e,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOSI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_trunc_f64 v[0:1], 0x1fffffff000
+// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction
+// GFX1250: v_trunc_f64_e32 v[0:1], 0x1fffffff000 ; encoding: [0xfe,0x2e,0x00,0x7e,0x00,0xf0,0xff,0xff,0xff,0x01,0x00,0x00]
+// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction
//---------------------------------------------------------------------------//
// named inline values: scc, vccz, execz
//---------------------------------------------------------------------------//
+buffer_atomic_add v0, off, s[0:3], scc offset:4095
// SICI: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xc8,0xe0,0x00,0x00,0x00,0xfd]
// GFX89: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd]
-buffer_atomic_add v0, off, s[0:3], scc offset:4095
+// GFX12XX: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xfd]
-// SICI: s_add_i32 s0, src_vccz, s0 ; encoding: [0xfb,0x00,0x00,0x81]
-// GFX89: s_add_i32 s0, src_vccz, s0 ; encoding: [0xfb,0x00,0x00,0x81]
s_add_i32 s0, vccz, s0
+// SICI: s_add_i32 s0, src_vccz, s0 ; encoding: [0xfb,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_vccz, s0 ; encoding: [0xfb,0x00,0x00,0x81]
+// NOGFX11: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:15: error: src_vccz register not available on this GPU
-// SICI: s_add_i32 s0, src_execz, s0 ; encoding: [0xfc,0x00,0x00,0x81]
-// GFX89: s_add_i32 s0, src_execz, s0 ; encoding: [0xfc,0x00,0x00,0x81]
s_add_i32 s0, execz, s0
+// SICI: s_add_i32 s0, src_execz, s0 ; encoding: [0xfc,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_execz, s0 ; encoding: [0xfc,0x00,0x00,0x81]
+// NOGFX11: :[[@LINE-3]]:15: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:15: error: src_execz register not available on this GPU
-// SICI: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81]
-// GFX89: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81]
s_add_i32 s0, scc, s0
+// SICI: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81]
+// GFX12XX: s_add_co_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81]
-// SICI: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x87]
-// GFX89: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x86]
s_and_b64 s[0:1], s[0:1], src_vccz
+// SICI: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x87]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x86]
+// NOGFX11: :[[@LINE-3]]:27: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:27: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:27: error: src_vccz register not available on this GPU
-// SICI: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x87]
-// GFX89: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x86]
s_and_b64 s[0:1], s[0:1], src_execz
+// SICI: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x87]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x86]
+// NOGFX11: :[[@LINE-3]]:27: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:27: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:27: error: src_execz register not available on this GPU
-// SICI: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x87]
-// GFX89: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x86]
s_and_b64 s[0:1], s[0:1], src_scc
+// SICI: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x87]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x86]
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x8b]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x8b]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX89: v_add_u16_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x4c]
v_add_u16 v0, vccz, v0
+// GFX89: v_add_u16_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x4c]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06]
v_add_u16_sdwa v0, scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06]
+// NOVI: :[[@LINE-3]]:20: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86]
v_add_u16_sdwa v0, v0, scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86]
+// NOVI: :[[@LINE-3]]:24: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_add_u32_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x68]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
v_add_u32 v0, execz, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x68]
+// NOVI: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX11: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:15: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:15: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_add_u32_e64 v0, src_scc, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
v_add_u32_e64 v0, scc, v0
+// GFX12XX: v_add_nc_u32_e64 v0, src_scc, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x02,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e64 v0, src_scc, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00]
+// GFX11: v_add_nc_u32_e64 v0, src_scc, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x02,0x00]
+// NOVI: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0x44,0x7d]
-// GFX89: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0xc4,0x7d]
v_cmp_eq_i64 vcc, scc, v[0:1]
+// SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0x44,0x7d]
+// GFX89: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0xc4,0x7d]
+// NOGFX11: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX89: v_max_f16_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x5a]
v_max_f16 v0, execz, v0
+// GFX89: v_max_f16_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x5a]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:15: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:15: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// SICI: v_max_f32_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x20]
-// GFX89: v_max_f32_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x16]
v_max_f32 v0, vccz, v0
+// SICI: v_max_f32_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x20]
+// GFX89: v_max_f32_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x16]
+// NOGFX11: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:15: error: src_vccz register not available on this GPU
-// SICI: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0xce,0xd2,0xfd,0x00,0x02,0x00]
-// GFX89: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00]
v_max_f64 v[0:1], scc, v[0:1]
+// SICI: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0xce,0xd2,0xfd,0x00,0x02,0x00]
+// GFX89: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00]
+// GFX12XX: v_max_num_f64_e32 v[0:1], src_scc, v[0:1] ; encoding: [0xfd,0x00,0x00,0x1c]
+// GFX11: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x2a,0xd7,0xfd,0x00,0x02,0x00]
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX9: v_pk_add_f16 v0, src_execz, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xfc,0x00,0x02,0x18]
v_pk_add_f16 v0, execz, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_pk_add_f16 v0, src_execz, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xfc,0x00,0x02,0x18]
+// NOVI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-4]]:18: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:18: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:18: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX89: v_ceil_f16_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20]
v_ceil_f16 v0, neg(vccz)
+// GFX89: v_ceil_f16_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:20: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:20: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:20: error: src_vccz register not available on this GPU
+// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX89: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
v_ceil_f16 v0, abs(scc)
+// GFX89: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
+// GFX12XX: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// GFX11: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// CI: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x30,0xd3,0xfc,0x00,0x00,0x00]
-// GFX89: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00]
v_ceil_f64 v[5:6], |execz|
+// GFX89: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00]
+// CI: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x30,0xd3,0xfc,0x00,0x00,0x00]
+// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-4]]:21: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:21: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:21: error: src_execz register not available on this GPU
-// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// CI: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x30,0xd3,0x6a,0x00,0x00,0x20]
-// GFX89: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20]
v_ceil_f64 v[5:6], -vcc
+// GFX89: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20]
+// CI: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x30,0xd3,0x6a,0x00,0x00,0x20]
+// GFX11: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x20]
+// GFX12: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x20]
+// NOSI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:12: error: invalid operand for instruction
-// SICI: v_ceil_f32_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x44,0xd3,0xfb,0x00,0x00,0x20]
-// GFX89: v_ceil_f32_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20]
v_ceil_f32 v0, -vccz
+// SICI: v_ceil_f32_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x44,0xd3,0xfb,0x00,0x00,0x20]
+// GFX89: v_ceil_f32_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20]
+// NOGFX11: :[[@LINE-3]]:17: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:17: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:17: error: src_vccz register not available on this GPU
-// SICI: v_ceil_f32_e64 v0, |src_execz| ; encoding: [0x00,0x01,0x44,0xd3,0xfc,0x00,0x00,0x00]
-// GFX89: v_ceil_f32_e64 v0, |src_execz| ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00]
v_ceil_f32 v0, |execz|
+// SICI: v_ceil_f32_e64 v0, |src_execz| ; encoding: [0x00,0x01,0x44,0xd3,0xfc,0x00,0x00,0x00]
+// GFX89: v_ceil_f32_e64 v0, |src_execz| ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-3]]:17: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:17: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:17: error: src_execz register not available on this GPU
-// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00]
v_ceil_f16_sdwa v5, |vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00]
+// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00]
v_ceil_f16_sdwa v5, -scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00]
+// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_ceil_f32_sdwa v5, vccz dst_sel:DWORD src0_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
+// GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00]
+// NOVI: :[[@LINE-3]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
-// GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
+// GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00]
+// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
//---------------------------------------------------------------------------//
// named inline values: shared_base, shared_limit, private_base, etc
//---------------------------------------------------------------------------//
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb]
buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095
+// NOSICI: :[[@LINE-1]]:36: error: src_shared_base register not available on this GPU
+// GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb]
+// GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xeb]
+// NOVI: :[[@LINE-4]]:36: error: src_shared_base register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-6]]:1: error: operands are not valid for this GPU or mode
+// NOSICIVI: :[[@LINE-1]]:36: error: src_shared_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81]
s_add_i32 s0, src_shared_base, s0
+// GFX12XX: s_add_co_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-2]]:15: error: src_shared_base register not available on this GPU
+// GFX9: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_shared_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_limit register not available on this GPU
-// GFX9: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81]
s_add_i32 s0, src_shared_limit, s0
+// GFX12XX: s_add_co_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-2]]:15: error: src_shared_limit register not available on this GPU
+// GFX9: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_limit register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_shared_limit register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
-// GFX9: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81]
s_add_i32 s0, src_private_base, s0
+// GFX12XX: s_add_co_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-2]]:15: error: src_private_base register not available on this GPU
+// GFX9: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81]
+// NOVI: :[[@LINE-5]]:15: error: src_private_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_private_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_limit register not available on this GPU
-// GFX9: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81]
s_add_i32 s0, src_private_limit, s0
+// GFX12XX: s_add_co_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-2]]:15: error: src_private_limit register not available on this GPU
+// GFX9: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81]
+// NOVI: :[[@LINE-5]]:15: error: src_private_limit register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_private_limit register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_pops_exiting_wave_id register not available on this GPU
-// GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81]
s_add_i32 s0, src_pops_exiting_wave_id, s0
+// NOSICI: :[[@LINE-1]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81]
+// NOVI: :[[@LINE-3]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_pops_exiting_wave_id register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86]
s_and_b64 s[0:1], s[0:1], src_shared_base
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x8b]
+// NOSICI: :[[@LINE-2]]:27: error: src_shared_base register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x8b]
+// NOVI: :[[@LINE-5]]:27: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_shared_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_limit register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86]
s_and_b64 s[0:1], s[0:1], src_shared_limit
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x8b]
+// NOSICI: :[[@LINE-2]]:27: error: src_shared_limit register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x8b]
+// NOVI: :[[@LINE-5]]:27: error: src_shared_limit register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_shared_limit register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86]
s_and_b64 s[0:1], s[0:1], src_private_base
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x8b]
+// NOSICI: :[[@LINE-2]]:27: error: src_private_base register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x8b]
+// NOVI: :[[@LINE-5]]:27: error: src_private_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_private_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_limit register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86]
s_and_b64 s[0:1], s[0:1], src_private_limit
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x8b]
+// NOSICI: :[[@LINE-2]]:27: error: src_private_limit register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x8b]
+// NOVI: :[[@LINE-5]]:27: error: src_private_limit register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_private_limit register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_pops_exiting_wave_id register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86]
s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id
+// NOSICI: :[[@LINE-1]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86]
+// NOVI: :[[@LINE-3]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_pops_exiting_wave_id register not available on this GPU
-// GFX9: v_add_u16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4c]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_add_u16 v0, src_shared_base, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4c]
+// NOVI: :[[@LINE-3]]:15: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06]
+// NOVI: :[[@LINE-3]]:20: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86]
+// NOVI: :[[@LINE-3]]:24: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_add_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x68]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_add_u32 v0, src_shared_base, v0
+// GFX12XX: v_add_nc_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4a]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x68]
+// GFX11: v_add_nc_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4a]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_add_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_add_u32_e64 v0, src_shared_base, v0
+// GFX12XX: v_add_nc_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xeb,0x00,0x02,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00]
+// GFX11: v_add_nc_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xeb,0x00,0x02,0x00]
+// NOVI: :[[@LINE-5]]:19: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d]
v_cmp_eq_i64 vcc, src_shared_base, v[0:1]
+// NOSICI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU
+// GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d]
+// NOVI: :[[@LINE-3]]:19: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-6]]:1: error: operands are not valid for this GPU or mode
+// NOSICIVI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU
-// GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_max_f16 v0, src_shared_base, v0
+// GFX12XX: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a]
+// GFX11: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x72]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x16]
v_max_f32 v0, src_shared_base, v0
+// GFX12XX: v_max_num_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x2c]
+// NOSICI: :[[@LINE-2]]:15: error: src_shared_base register not available on this GPU
+// GFX9: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x16]
+// GFX11: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x20]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_shared_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00]
v_max_f64 v[0:1], src_shared_base, v[0:1]
+// GFX12XX: v_max_num_f64_e32 v[0:1], src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0x00,0x1c]
+// NOSICI: :[[@LINE-2]]:19: error: src_shared_base register not available on this GPU
+// GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00]
+// GFX11: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x2a,0xd7,0xeb,0x00,0x02,0x00]
+// NOVI: :[[@LINE-5]]:19: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xeb,0x00,0x02,0x18]
v_pk_add_f16 v0, src_shared_base, v0
+// GFX12XX: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x0f,0xcc,0xeb,0x00,0x02,0x18]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xeb,0x00,0x02,0x18]
+// GFX11: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x0f,0xcc,0xeb,0x00,0x02,0x18]
+// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_ceil_f16 v0, neg(src_shared_base)
+// GFX12XX: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20]
+// GFX11: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_ceil_f16 v0, abs(src_shared_base)
+// GFX12XX: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00]
+// GFX11: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00]
-// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_ceil_f64 v[5:6], |src_shared_base|
+// GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00]
+// GFX11: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x98,0xd5,0xeb,0x00,0x00,0x00]
+// GFX12: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x98,0xd5,0xeb,0x00,0x00,0x00]
+// NOSI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-6]]:21: error: src_shared_base register not available on this GPU
+// NOGFX1250: :[[@LINE-7]]:12: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
-// GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20]
-// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_ceil_f64 v[5:6], -src_shared_base
+// GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20]
+// GFX11: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x98,0xd5,0xeb,0x00,0x00,0x20]
+// GFX12: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x98,0xd5,0xeb,0x00,0x00,0x20]
+// NOSI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-6]]:21: error: src_shared_base register not available on this GPU
+// NOGFX1250: :[[@LINE-7]]:12: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20]
v_ceil_f32 v0, -src_shared_base
+// GFX12XX: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xa2,0xd5,0xeb,0x00,0x00,0x20]
+// NOSICI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU
+// GFX9: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20]
+// GFX11: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xa2,0xd5,0xeb,0x00,0x00,0x20]
+// NOVI: :[[@LINE-5]]:17: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00]
v_ceil_f32 v0, |src_shared_base|
+// GFX12XX: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xa2,0xd5,0xeb,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU
+// GFX9: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00]
+// GFX11: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xa2,0xd5,0xeb,0x00,0x00,0x00]
+// NOVI: :[[@LINE-5]]:17: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
-// GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
+// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00]
+// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD src0_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
+// GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00]
+// NOVI: :[[@LINE-3]]:21: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
-// GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
+// GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
+// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
//---------------------------------------------------------------------------//
// named inline values compete with other scalars for constant bus access
//---------------------------------------------------------------------------//
-// NOGFX9: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
v_add_u32 v0, private_base, s0
+// GFX12XX: v_add_nc_u32_e64 v0, src_private_base, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xed,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_add_nc_u32_e64 v0, src_private_base, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xed,0x00,0x00,0x00]
+// NOVI: :[[@LINE-4]]:15: error: src_private_base register not available on this GPU
+// NOGFX9: :[[@LINE-5]]:29: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
v_add_u32 v0, scc, s0
+// GFX12XX: v_add_nc_u32_e64 v0, src_scc, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_add_nc_u32_e64 v0, src_scc, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x00,0x00]
+// NOVI: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX9: :[[@LINE-5]]:20: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
// v_div_fmas implicitly reads VCC
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_div_fmas_f32 v0, shared_base, v0, v1
+// GFX12XX: v_div_fmas_f32 v0, src_shared_base, v0, v1 ; encoding: [0x00,0x00,0x37,0xd6,0xeb,0x00,0x06,0x04]
+// NOSICI: :[[@LINE-2]]:20: error: src_shared_base register not available on this GPU
+// GFX11: v_div_fmas_f32 v0, src_shared_base, v0, v1 ; encoding: [0x00,0x00,0x37,0xd6,0xeb,0x00,0x06,0x04]
+// NOVI: :[[@LINE-4]]:20: error: src_shared_base register not available on this GPU
+// NOGFX9: :[[@LINE-5]]:20: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:20: error: src_shared_base register not available on this GPU
// v_div_fmas implicitly reads VCC
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_limit register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_div_fmas_f32 v0, v0, shared_limit, v1
+// GFX12XX: v_div_fmas_f32 v0, v0, src_shared_limit, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xd9,0x05,0x04]
+// NOSICI: :[[@LINE-2]]:24: error: src_shared_limit register not available on this GPU
+// GFX11: v_div_fmas_f32 v0, v0, src_shared_limit, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xd9,0x05,0x04]
+// NOVI: :[[@LINE-4]]:24: error: src_shared_limit register not available on this GPU
+// NOGFX9: :[[@LINE-5]]:24: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:24: error: src_shared_limit register not available on this GPU
// v_div_fmas implicitly reads VCC
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_limit register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_div_fmas_f32 v0, v0, v1, private_limit
+// GFX12XX: v_div_fmas_f32 v0, v0, v1, src_private_limit ; encoding: [0x00,0x00,0x37,0xd6,0x00,0x03,0xba,0x03]
+// NOSICI: :[[@LINE-2]]:28: error: src_private_limit register not available on this GPU
+// GFX11: v_div_fmas_f32 v0, v0, v1, src_private_limit ; encoding: [0x00,0x00,0x37,0xd6,0x00,0x03,0xba,0x03]
+// NOVI: :[[@LINE-4]]:28: error: src_private_limit register not available on this GPU
+// NOGFX9: :[[@LINE-5]]:28: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:28: error: src_private_limit register not available on this GPU
// v_div_fmas implicitly reads VCC
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_div_fmas_f32 v0, execz, v0, v1
+// NOSICI: :[[@LINE-1]]:20: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-2]]:20: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-3]]:20: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:20: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:20: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:20: error: invalid operand (violates constant bus restrictions)
// v_div_fmas implicitly reads VCC
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_div_fmas_f32 v0, v0, scc, v1
+// GFX12XX: v_div_fmas_f32 v0, v0, src_scc, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xfb,0x05,0x04]
+// NOSICI: :[[@LINE-2]]:24: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-3]]:24: error: invalid operand (violates constant bus restrictions)
+// GFX11: v_div_fmas_f32 v0, v0, src_scc, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xfb,0x05,0x04]
+// NOSICIVI: :[[@LINE-1]]:24: error: invalid operand (violates constant bus restrictions)
// v_div_fmas implicitly reads VCC
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_div_fmas_f32 v0, v0, v1, vccz
+// NOSICI: :[[@LINE-1]]:28: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-2]]:28: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-3]]:28: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:28: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:28: error: src_vccz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:28: error: invalid operand (violates constant bus restrictions)
// v_addc_co_u32 implicitly reads VCC (VOP2)
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_addc_co_u32 v0, vcc, shared_base, v0, vcc
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-3]]:24: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_madak_f32 v0, shared_base, v0, 0x11213141
+// NOSICI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU
+// NOGFX9: :[[@LINE-3]]:17: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_madak_f32 v0, scc, v0, 0x11213141
+// NOSICI: :[[@LINE-1]]:17: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-2]]:17: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:17: error: invalid operand (violates constant bus restrictions)
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
v_madak_f32 v0, 0xff32ff, v0, 0x11213141
+// NOSICI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
+// NOGFX89: :[[@LINE-2]]:31: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
v_madak_f32 v0, 0xff32ff, v0, 1
+// NOSICI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
+// NOGFX89: :[[@LINE-2]]:31: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
v_madmk_f32 v0, 0xff32ff, 0x11213141, v0
+// NOSICI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
+// NOGFX89: :[[@LINE-2]]:27: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
v_madmk_f32 v0, 0xff32ff, -1, v0
+// NOSICI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
+// NOGFX89: :[[@LINE-2]]:27: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
v_madak_f16 v0, 0xff32, v0, 0x1122
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:29: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
v_madak_f16 v0, 0xff32, v0, 0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:29: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
v_madmk_f16 v0, 0xff32, 0x1122, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:25: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
v_madmk_f16 v0, 0xff32, 1, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:25: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_cmp_eq_f32 s[0:1], private_base, private_limit
+// NOSICI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
+// NOVI: :[[@LINE-2]]:22: error: src_private_base register not available on this GPU
+// NOGFX9: :[[@LINE-3]]:36: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:14: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:14: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-6]]:14: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_cmp_eq_f32 s[0:1], private_base, s0
+// NOSICI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
+// NOVI: :[[@LINE-2]]:22: error: src_private_base register not available on this GPU
+// NOGFX9: :[[@LINE-3]]:36: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:14: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:14: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-6]]:14: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_cmp_eq_f32 s[0:1], execz, s0
+// NOSICI: :[[@LINE-1]]:29: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-2]]:29: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-3]]:22: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:22: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:22: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:29: error: invalid operand (violates constant bus restrictions)
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_pk_add_f16 v255, private_base, private_limit
+// GFX12XX: v_pk_add_f16 v255, src_private_base, src_private_limit ; encoding: [0xff,0x40,0x0f,0xcc,0xed,0xdc,0x01,0x18]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_pk_add_f16 v255, src_private_base, src_private_limit ; encoding: [0xff,0x40,0x0f,0xcc,0xed,0xdc,0x01,0x18]
+// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-5]]:34: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
v_pk_add_f16 v255, vccz, execz
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-3]]:26: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:20: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:20: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:20: error: src_vccz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
//---------------------------------------------------------------------------//
-// check dummy lit() syntax for sp3 compatibility.
+// check lit() syntax.
//---------------------------------------------------------------------------//
-// SICI: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX89: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
v_sqrt_f32 v2, lit(123)
+// SICI: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX89: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX11: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// SICI: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX89: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
v_sqrt_f32 v2, abs(lit(123))
+// SICI: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX89: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX11: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// SICI: v_sqrt_f32_e32 v2, 0x42f60000 ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42
-// GFX89: v_sqrt_f32_e32 v2, 0x42f60000 ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42]
v_sqrt_f32 v2, lit(123.0)
+// SICI: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX89: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX12XX: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX11: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
-// SICI: v_sqrt_f64_e32 v[2:3], 0x405ec000 ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
-// GFX89: v_sqrt_f64_e32 v[2:3], 0x405ec000 ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40]
v_sqrt_f64 v[2:3], lit(123.0)
+// SICI: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
+// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40]
+// GFX11: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
+// GFX12: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00]
-// SICI: v_sqrt_f64_e32 v[2:3], 0x7b ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX89: v_sqrt_f64_e32 v[2:3], 0x7b ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00]
v_sqrt_f64 v[2:3], lit(123)
+// SICI: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX11: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected left paren after lit
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected left paren after lit
v_sqrt_f32 v2, lit 123.0
+// NOGCN: :[[@LINE-1]]:20: error: expected left paren after lit
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected closing parentheses
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected closing parentheses
v_sqrt_f32 v2, lit(123.0
+// NOGCN: :[[@LINE-1]]:25: error: expected closing parentheses
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected immediate with lit modifier
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected immediate with lit modifier
v_sqrt_f32 v2, lit(v1)
+// NOGCN: :[[@LINE-1]]:20: error: expected immediate with lit modifier
// Make sure lit() is accepted on operands without modifiers.
-// SICI: v_madak_f32 v4, 0x7e8, v8, 0x7e8 ; encoding: [0xff,0x10,0x08,0x42,0xe8,0x07,0x00,0x00]
-// GFX89: v_madak_f32 v4, 0x7e8, v8, 0x7e8 ; encoding: [0xff,0x10,0x08,0x30,0xe8,0x07,0x00,0x00]
v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8)
+// SICI: v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) ; encoding: [0xff,0x10,0x08,0x42,0xe8,0x07,0x00,0x00]
+// GFX89: v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) ; encoding: [0xff,0x10,0x08,0x30,0xe8,0x07,0x00,0x00]
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: not a valid operand.
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand.
v_madak_f32 v4, lit(lit(0x7e8)), v8, lit(0x7e8)
+// NOSICI: :[[@LINE-1]]:24: error: not a valid operand.
+// NOGFX89: :[[@LINE-2]]:24: error: not a valid operand.
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:24: error: not a valid operand.
diff --git a/llvm/test/MC/AMDGPU/vop3-literal.s b/llvm/test/MC/AMDGPU/vop3-literal.s
index 56e71b9..dd6be544 100644
--- a/llvm/test/MC/AMDGPU/vop3-literal.s
+++ b/llvm/test/MC/AMDGPU/vop3-literal.s
@@ -3,6 +3,7 @@
// RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx900 | FileCheck %s -check-prefix=GFX9
// RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx1010 -mattr=+wavefrontsize64 | FileCheck %s -check-prefix=GFX10
// RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx1250 -mattr=+wavefrontsize64 | FileCheck %s -check-prefix=GFX1250
+// RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx1250 -mattr=+wavefrontsize64 | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn %s -filetype=null -no-warn 2>&1 -mcpu=gfx900 | FileCheck %s -implicit-check-not=error: -check-prefix=GFX9-ERR
// RUN: not llvm-mc -triple=amdgcn %s -filetype=null -no-warn 2>&1 -mcpu=gfx1010 -mattr=+wavefrontsize64 | FileCheck %s -implicit-check-not=error: -check-prefix=GFX10-ERR
@@ -185,7 +186,7 @@ v_add_f16_e64 v0, 0xfe0b, neg(0xfe0b)
v_add_f64 v[0:1], 1.23456, v[0:1]
// GFX10: v_add_f64 v[0:1], 0x3ff3c0c1, v[0:1] ; encoding: [0x00,0x00,0x64,0xd5,0xff,0x00,0x02,0x00,0xc1,0xc0,0xf3,0x3f]
-// GFX1250: v_add_f64_e32 v[0:1], lit64(0x3ff3c0c1fc8f3238), v[0:1] ; encoding: [0xfe,0x00,0x00,0x04,0x38,0x32,0x8f,0xfc,0xc1,0xc0,0xf3,0x3f]
+// GFX1250: v_add_f64_e32 v[0:1], 0x3ff3c0c1fc8f3238, v[0:1] ; encoding: [0xfe,0x00,0x00,0x04,0x38,0x32,0x8f,0xfc,0xc1,0xc0,0xf3,0x3f]
// GFX9-ERR: :[[@LINE-3]]:19: error: literal operands are not supported
v_add_f64 v[0:1], v[0:1], -abs(1.23456)
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt
index 7064479..d44400e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt
@@ -2,55 +2,55 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_mov_b64 s[2:3], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_mov_b64 s[2:3], 0x10abcdef12345678 ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_add_nc_u64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_add_nc_u64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_and_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_and_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), lit64(0x10abcdef12345678) ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, 0x10abcdef12345678 ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_ashr_i64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_ashr_i64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80
-# GFX1250: s_bfe_i64 s[2:3], lit64(0x80abcdef12345678), 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80]
+# GFX1250: s_bfe_i64 s[2:3], 0x80abcdef12345678, 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80]
0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_bfe_u64 s[2:3], lit64(0x10abcdef12345678), 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_bfe_u64 s[2:3], 0x10abcdef12345678, 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_cselect_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_lshl_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_lshl_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_lshr_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_lshr_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_mul_u64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_mul_u64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_nand_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_nand_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_nor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_nor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_or_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_or_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_or_not1_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_xnor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_xnor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_xor_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_xor_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
index 227e1c4..34a4646 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
@@ -2,7 +2,7 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00
-# GFX1250: s_add_pc_i64 lit64(0x12345678abcd0) ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00]
+# GFX1250: s_add_pc_i64 0x12345678abcd0 ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00]
0xff,0x4b,0x80,0xbe,0x64,0x00,0x00,0x00
# GFX1250: s_add_pc_i64 0x64 ; encoding: [0xff,0x4b,0x80,0xbe,0x64,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt
index 1571fb9..cce6a74 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt
@@ -2,211 +2,211 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_add_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_add_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_ceil_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_class_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_class_f64_e32 vcc_lo, 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_eq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_eq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_ge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_ge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_gt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_gt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_gt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_gt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_gt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_gt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_le_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_le_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_le_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_le_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_le_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_le_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_lg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_lg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_lt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_lt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_lt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_lt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_lt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_lt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_ne_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_ne_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_ne_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_ne_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_neq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_neq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_nge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_nge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_ngt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_ngt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_nle_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_nle_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_nlg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_nlg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_nlt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_nlt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_o_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_o_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_u_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_u_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_class_f64_e32 lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_class_f64_e32 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_eq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_eq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_eq_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_eq_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_eq_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_eq_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ge_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ge_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ge_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ge_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_gt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_gt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_gt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_gt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_gt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_gt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_le_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_le_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_le_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_le_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_le_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_le_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_lg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_lg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_lt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_lt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_lt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_lt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_lt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_lt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ne_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ne_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ne_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ne_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_neq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_neq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_nge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_nge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ngt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ngt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_nle_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_nle_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_nlg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_nlg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_nlt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_nlt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_o_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_o_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_u_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_u_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cvt_f32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cvt_f32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cvt_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cvt_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cvt_u32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cvt_u32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_floor_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_floor_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_fract_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_fract_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_frexp_exp_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_frexp_exp_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_frexp_mant_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_frexp_mant_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_max_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_max_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_min_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_min_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_mul_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_mul_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_rcp_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_rcp_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_rndne_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_rndne_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_rsq_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_rsq_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_sqrt_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_sqrt_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_trunc_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_trunc_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40
-# GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x4063233333333333) ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40]
+# GFX1250: v_ceil_f64_e32 v[254:255], 0x4063233333333333 ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40]
0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44
-# GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x448969368974c05b) ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44]
+# GFX1250: v_ceil_f64_e32 v[254:255], 0x448969368974c05b ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44]
0xff,0x30,0xfc,0x7f,0x00,0x20,0x63,0x40
# GFX1250: v_ceil_f64_e32 v[254:255], 0x40632000 ; encoding: [0xff,0x30,0xfc,0x7f,0x00,0x20,0x63,0x40]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index 94edf22..acf7ded 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -3,7 +3,7 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
0xff,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf
-# GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456 ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x3a,0x08,0x7e
# GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
index 130941c..58ac4e9 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
@@ -146,7 +146,7 @@
# GFX1250: v_add_nc_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x50,0x73,0x72,0x71,0x3f]
0xff,0x08,0x08,0x50,0x56,0x34,0x12,0xaf
-# GFX1250: v_add_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: v_add_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x7e,0x08,0x08,0x50
# GFX1250: v_add_nc_u64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x50]
@@ -233,7 +233,7 @@
# GFX1250: v_sub_nc_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x52,0x73,0x72,0x71,0x3f]
0xff,0x08,0x08,0x52,0x56,0x34,0x12,0xaf
-# GFX1250: v_sub_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: v_sub_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x7e,0x08,0x08,0x52
# GFX1250: v_sub_nc_u64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x52]
@@ -320,7 +320,7 @@
# GFX1250: v_mul_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x54,0x73,0x72,0x71,0x3f]
0xff,0x08,0x08,0x54,0x56,0x34,0x12,0xaf
-# GFX1250: v_mul_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: v_mul_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x7e,0x08,0x08,0x54
# GFX1250: v_mul_u64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x54]
@@ -377,13 +377,13 @@
# GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[2:3], 0x405ec000 ; encoding: [0xfe,0x04,0xfc,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[6:7], lit64(0x405ec66666666666) ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 0x405ec66666666666 ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[8:9], lit64(0x405ec66666666666) ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[8:9], 0x405ec66666666666 ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
0xf2,0x10,0x08,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f
# GFX1250: v_fmaak_f64 v[4:5], 1.0, v[8:9], 0x3ff00000 ; encoding: [0xf2,0x10,0x08,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f]
@@ -395,7 +395,7 @@
# GFX1250: v_fmaak_f64 v[4:5], lit64(0x7e8), v[8:9], lit64(0x7e8) ; encoding: [0xfe,0x10,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00]
0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], lit64(0x405ec66666666666) ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], 0x405ec66666666666 ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
0xc1,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40
# GFX1250: v_fmaak_f64 v[6:7], -1, v[8:9], 0x405ec000 ; encoding: [0xc1,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
@@ -410,7 +410,7 @@
# GFX1250: v_fmaak_f64 v[6:7], null, v[8:9], 0x405ec000 ; encoding: [0x7c,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], lit64(0x405ec00012345678) ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678 ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
0xfd,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40
# GFX1250: v_fmaak_f64 v[6:7], src_scc, v[8:9], 0x405ec000 ; encoding: [0xfd,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
@@ -431,13 +431,13 @@
# GFX1250: v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[2:3] ; encoding: [0xfe,0x04,0xfc,0x47,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[254:255], lit64(0x405ec00012345678), lit64(0x405ec00012345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
0xf2,0x0c,0x08,0x46,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f
# GFX1250: v_fmamk_f64 v[4:5], 1.0, 0x3ff00000, v[6:7] ; encoding: [0xf2,0x0c,0x08,0x46,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f]
@@ -449,7 +449,7 @@
# GFX1250: v_fmamk_f64 v[4:5], lit64(0x7e8), lit64(0x7e8), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00]
0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[4:5], v[2:3], lit64(0x405ec66666666666), v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[4:5], v[2:3], 0x405ec66666666666, v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
0xc1,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40
# GFX1250: v_fmamk_f64 v[6:7], -1, 0x405ec000, v[2:3] ; encoding: [0xc1,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
@@ -464,7 +464,7 @@
# GFX1250: v_fmamk_f64 v[6:7], null, 0x405ec000, v[2:3] ; encoding: [0x7c,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[6:7], s[2:3], lit64(0x405ec00012345678), v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
0xfd,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40
# GFX1250: v_fmamk_f64 v[6:7], src_scc, 0x405ec000, v[2:3] ; encoding: [0xfd,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
index c88fbc2..06ef877 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
@@ -868,7 +868,7 @@
0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x2d,0x80,0xbe
# GFX12: s_and_not0_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x2d,0x80,0xbe]
@@ -959,7 +959,7 @@
0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not0_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x35,0x80,0xbe
# GFX12: s_and_not0_wrexec_b64 s[0:1], -1 ; encoding: [0xc1,0x35,0x80,0xbe]
@@ -1050,7 +1050,7 @@
0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x31,0x80,0xbe
# GFX12: s_and_not1_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x31,0x80,0xbe]
@@ -1141,7 +1141,7 @@
0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not1_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x37,0x80,0xbe
# GFX12: s_and_not1_wrexec_b64 s[0:1], -1 ; encoding: [0xc1,0x37,0x80,0xbe]
@@ -1232,7 +1232,7 @@
0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x21,0x80,0xbe
# GFX12: s_and_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x21,0x80,0xbe]
@@ -1341,7 +1341,7 @@
0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_bcnt0_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x17,0x80,0xbe
# GFX12: s_bcnt0_i32_b64 s0, -1 ; encoding: [0xc1,0x17,0x80,0xbe]
@@ -1453,7 +1453,7 @@
0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_bcnt1_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x19,0x80,0xbe
# GFX12: s_bcnt1_i32_b64 s0, -1 ; encoding: [0xc1,0x19,0x80,0xbe]
@@ -1832,7 +1832,7 @@
0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_brev_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x05,0x80,0xbe
# GFX12: s_brev_b64 s[0:1], -1 ; encoding: [0xc1,0x05,0x80,0xbe]
@@ -1887,7 +1887,7 @@
0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cls_i32_i64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x0d,0x80,0xbe
# GFX12: s_cls_i32_i64 s0, -1 ; encoding: [0xc1,0x0d,0x80,0xbe]
@@ -2053,7 +2053,7 @@
0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_clz_i32_u64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x0b,0x80,0xbe
# GFX12: s_clz_i32_u64 s0, -1 ; encoding: [0xc1,0x0b,0x80,0xbe]
@@ -2159,7 +2159,7 @@
0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cmov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x03,0x80,0xbe
# GFX12: s_cmov_b64 s[0:1], -1 ; encoding: [0xc1,0x03,0x80,0xbe]
@@ -2268,7 +2268,7 @@
0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_ctz_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x09,0x80,0xbe
# GFX12: s_ctz_i32_b64 s0, -1 ; encoding: [0xc1,0x09,0x80,0xbe]
@@ -2396,7 +2396,7 @@
0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_mov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x01,0x80,0xbe
# GFX12: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x01,0x80,0xbe]
@@ -2493,7 +2493,7 @@
0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_movreld_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x43,0x80,0xbe
# GFX12: s_movreld_b64 s[0:1], -1 ; encoding: [0xc1,0x43,0x80,0xbe]
@@ -2662,7 +2662,7 @@
0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nand_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x27,0x80,0xbe
# GFX12: s_nand_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x27,0x80,0xbe]
@@ -2753,7 +2753,7 @@
0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x29,0x80,0xbe
# GFX12: s_nor_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x29,0x80,0xbe]
@@ -2856,7 +2856,7 @@
0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_not_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x1f,0x80,0xbe
# GFX12: s_not_b64 s[0:1], -1 ; encoding: [0xc1,0x1f,0x80,0xbe]
@@ -2947,7 +2947,7 @@
0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x2f,0x80,0xbe
# GFX12: s_or_not0_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x2f,0x80,0xbe]
@@ -3038,7 +3038,7 @@
0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x33,0x80,0xbe
# GFX12: s_or_not1_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x33,0x80,0xbe]
@@ -3129,7 +3129,7 @@
0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x23,0x80,0xbe
# GFX12: s_or_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x23,0x80,0xbe]
@@ -3232,7 +3232,7 @@
0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_quadmask_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x1b,0x80,0xbe
# GFX12: s_quadmask_b64 s[0:1], -1 ; encoding: [0xc1,0x1b,0x80,0xbe]
@@ -3549,7 +3549,7 @@
0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_wqm_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x1d,0x80,0xbe
# GFX12: s_wqm_b64 s[0:1], -1 ; encoding: [0xc1,0x1d,0x80,0xbe]
@@ -3640,7 +3640,7 @@
0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xnor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x2b,0x80,0xbe
# GFX12: s_xnor_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x2b,0x80,0xbe]
@@ -3731,7 +3731,7 @@
0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf
# GFX1200: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x25,0x80,0xbe
# GFX12: s_xor_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x25,0x80,0xbe]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
index d889931..47b7408 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
@@ -56,7 +56,7 @@
0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf
# GFX1200: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf]
-# GFX1250: s_add_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0x7e,0x80,0xa9
# GFX12: s_add_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xa9]
@@ -81,7 +81,7 @@
0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf
# GFX1200: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf]
-# GFX1250: s_add_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0x04,0x00,0xaa
# GFX12: s_sub_nc_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x00,0xaa]
@@ -136,7 +136,7 @@
0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf
# GFX1200: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf]
-# GFX1250: s_sub_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0x7e,0x00,0xaa
# GFX12: s_sub_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x00,0xaa]
@@ -161,7 +161,7 @@
0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf
# GFX1200: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf]
-# GFX1250: s_sub_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0x04,0x80,0xaa
# GFX12: s_mul_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0xaa]
@@ -216,7 +216,7 @@
0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf
# GFX1200: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf]
-# GFX1250: s_mul_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0x7e,0x80,0xaa
# GFX12: s_mul_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xaa]
@@ -241,7 +241,7 @@
0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf
# GFX1200: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf]
-# GFX1250: s_mul_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x01,0x02,0x05,0xa0
# GFX12: s_add_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa0]
@@ -1697,7 +1697,7 @@
0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf
# GFX1200: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x8b
# GFX12: s_and_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8b]
@@ -1725,7 +1725,7 @@
0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf
# GFX1200: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0xc1,0x80,0x8b
# GFX12: s_and_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8b]
@@ -1882,7 +1882,7 @@
0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf
# GFX1200: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x91
# GFX12: s_and_not1_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x91]
@@ -1910,7 +1910,7 @@
0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf
# GFX1200: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0xc1,0x80,0x91
# GFX12: s_and_not1_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x91]
@@ -2067,7 +2067,7 @@
0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf
# GFX1200: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf]
-# GFX1250: s_ashr_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x86
# GFX12: s_ashr_i64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x86]
@@ -2251,7 +2251,7 @@
0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf
# GFX1200: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf]
-# GFX1250: s_bfe_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x94
# GFX12: s_bfe_i64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x94]
@@ -2435,7 +2435,7 @@
0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf
# GFX1200: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf]
-# GFX1250: s_bfe_u64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x00,0x94
# GFX12: s_bfe_u64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x00,0x94]
@@ -2820,7 +2820,7 @@
0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf
# GFX1200: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cselect_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x98
# GFX12: s_cselect_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x98]
@@ -2848,7 +2848,7 @@
0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf
# GFX1200: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cselect_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0xc1,0x80,0x98
# GFX12: s_cselect_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x98]
@@ -3425,7 +3425,7 @@
0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf
# GFX1200: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf]
-# GFX1250: s_lshl_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x84
# GFX12: s_lshl_b64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x84]
@@ -3609,7 +3609,7 @@
0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf
# GFX1200: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf]
-# GFX1250: s_lshr_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x85
# GFX12: s_lshr_b64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x85]
@@ -4528,7 +4528,7 @@
0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf
# GFX1200: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nand_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x8e
# GFX12: s_nand_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8e]
@@ -4556,7 +4556,7 @@
0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf
# GFX1200: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nand_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0xc1,0x80,0x8e
# GFX12: s_nand_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8e]
@@ -4713,7 +4713,7 @@
0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf
# GFX1200: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x8f
# GFX12: s_nor_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8f]
@@ -4741,7 +4741,7 @@
0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf
# GFX1200: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0xc1,0x80,0x8f
# GFX12: s_nor_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8f]
@@ -4898,7 +4898,7 @@
0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf
# GFX1200: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x8c
# GFX12: s_or_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8c]
@@ -4926,7 +4926,7 @@
0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf
# GFX1200: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0xc1,0x80,0x8c
# GFX12: s_or_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8c]
@@ -5083,7 +5083,7 @@
0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf
# GFX1200: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x92
# GFX12: s_or_not1_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x92]
@@ -5111,7 +5111,7 @@
0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf
# GFX1200: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0xc1,0x80,0x92
# GFX12: s_or_not1_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x92]
@@ -5898,7 +5898,7 @@
0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf
# GFX1200: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xnor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x90
# GFX12: s_xnor_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x90]
@@ -5926,7 +5926,7 @@
0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf
# GFX1200: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xnor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0xc1,0x80,0x90
# GFX12: s_xnor_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x90]
@@ -6083,7 +6083,7 @@
0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf
# GFX1200: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0xc1,0x04,0x80,0x8d
# GFX12: s_xor_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8d]
@@ -6111,7 +6111,7 @@
0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf
# GFX1200: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x02,0xc1,0x80,0x8d
# GFX12: s_xor_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8d]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt
index a8da16f..9355582 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt
@@ -1492,7 +1492,7 @@
0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf
# GFX1200: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cmp_eq_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x00,0xc1,0x10,0xbf
# GFX12: s_cmp_eq_u64 s[0:1], -1 ; encoding: [0x00,0xc1,0x10,0xbf]
@@ -2015,7 +2015,7 @@
0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf
# GFX1200: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cmp_lg_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
0x00,0xc1,0x11,0xbf
# GFX12: s_cmp_lg_u64 s[0:1], -1 ; encoding: [0x00,0xc1,0x11,0xbf]
diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll
index 6fa57f1..3536932 100644
--- a/llvm/test/Other/new-pm-print-pipeline.ll
+++ b/llvm/test/Other/new-pm-print-pipeline.ll
@@ -50,7 +50,7 @@
; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>)
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
-; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
+; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;no-switch-to-arithmetic;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-arithmetic;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19
; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>)
diff --git a/llvm/test/Transforms/GVN/ptrtoaddr.ll b/llvm/test/Transforms/GVN/ptrtoaddr.ll
new file mode 100644
index 0000000..6d02bc6
--- /dev/null
+++ b/llvm/test/Transforms/GVN/ptrtoaddr.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=gvn < %s | FileCheck %s
+
+define i64 @ptrtoaddr_same(ptr %p) {
+; CHECK-LABEL: define i64 @ptrtoaddr_same(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT: ret i64 0
+;
+ %i = ptrtoaddr ptr %p to i64
+ %j = ptrtoaddr ptr %p to i64
+ %sub = sub i64 %i, %j
+ ret i64 %sub
+}
+
+; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ
+; in result type for the same input.
+define i64 @ptrtoaddr_different(ptr %p, ptr %p2) {
+; CHECK-LABEL: define i64 @ptrtoaddr_different(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[I]], [[J]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %i = ptrtoaddr ptr %p to i64
+ %j = ptrtoaddr ptr %p2 to i64
+ %sub = sub i64 %i, %j
+ ret i64 %sub
+}
diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
new file mode 100644
index 0000000..2b22965
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=instcombine %s -S | FileCheck %s
+
+declare void @clobber.i32(i32)
+
+define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[SEL_V]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[SEL_V]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %lower.zext, %upper.shl
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.range(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.shl = shl nuw i32 %upper, 16
+ %pack = or disjoint i32 %upper.shl, %lower
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.range.commute(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.range.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.shl = shl nuw i32 %upper, 16
+ %pack = or disjoint i32 %lower, %upper.shl
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i32 @selective_shift_16.masked(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_16.masked(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %lower.zext, %upper.shl
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %sel.masked = and i32 %sel, 65535
+ ret i32 %sel.masked
+}
+
+define i32 @selective_shift_16.masked.commute(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_16.masked.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %sel.masked = and i32 %sel, 65535
+ ret i32 %sel.masked
+}
+
+define <2 x i16> @selective_shift.v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) {
+; CHECK-LABEL: define <2 x i16> @selective_shift.v16(
+; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16)
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer
+; CHECK-NEXT: [[SEL_V:%.*]] = select <2 x i1> [[MASK_BIT_Z]], <2 x i16> [[LOWER]], <2 x i16> [[UPPER]]
+; CHECK-NEXT: ret <2 x i16> [[SEL_V]]
+;
+ %upper.zext = zext <2 x i16> %upper to <2 x i32>
+ %upper.shl = shl nuw <2 x i32> %upper.zext, splat(i32 16)
+ %lower.zext = zext <2 x i16> %lower to <2 x i32>
+ %pack = or disjoint <2 x i32> %upper.shl, %lower.zext
+ %mask.bit = and <2 x i32> %mask, splat(i32 16)
+ %sel = lshr <2 x i32> %pack, %mask.bit
+ %trunc = trunc <2 x i32> %sel to <2 x i16>
+ ret <2 x i16> %trunc
+}
+
+define i16 @selective_shift_16.wide(i64 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.wide(
+; CHECK-SAME: i64 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[SEL_V]]
+;
+ %upper.zext = zext i16 %upper to i64
+ %upper.shl = shl nuw i64 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i64
+ %pack = or disjoint i64 %upper.shl, %lower.zext
+ %mask.bit = and i64 %mask, 16
+ %sel = lshr i64 %pack, %mask.bit
+ %trunc = trunc i64 %sel to i16
+ ret i16 %trunc
+}
+
+; narrow zext type blocks fold
+define i16 @selective_shift_16.narrow(i24 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.narrow(
+; CHECK-SAME: i24 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i24
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl i24 [[UPPER_ZEXT]], 16
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i24
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i24 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i24 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i24 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i24 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i24
+ %upper.shl = shl i24 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i24
+ %pack = or disjoint i24 %upper.shl, %lower.zext
+ %mask.bit = and i24 %mask, 16
+ %sel = lshr i24 %pack, %mask.bit
+ %trunc = trunc i24 %sel to i16
+ ret i16 %trunc
+}
+
+; %lower's upper bits block fold
+define i16 @selective_shift_16_norange(i32 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16_norange(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER]], 16
+; CHECK-NEXT: [[PACK:%.*]] = or i32 [[UPPER_SHL]], [[LOWER]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.shl = shl nuw i32 %upper, 16
+ %pack = or i32 %upper.shl, %lower
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.mu.0(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.0(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT: call void @clobber.i32(i32 [[UPPER_ZEXT]])
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT: call void @clobber.i32(i32 [[LOWER_ZEXT]])
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i32
+ call void @clobber.i32(i32 %upper.zext)
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ call void @clobber.i32(i32 %lower.zext)
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+; multi-use of %pack blocks fold
+define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.1(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: call void @clobber.i32(i32 [[PACK]])
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ call void @clobber.i32(i32 %pack)
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+; non-truncated use of %sel blocks fold
+define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.2(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: call void @clobber.i32(i32 [[SEL]])
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ call void @clobber.i32(i32 %sel)
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+; bitwidth must be a power of 2 to fold
+define i24 @selective_shift_24(i48 %mask, i24 %upper, i24 %lower) {
+; CHECK-LABEL: define i24 @selective_shift_24(
+; CHECK-SAME: i48 [[MASK:%.*]], i24 [[UPPER:%.*]], i24 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i24 [[UPPER]] to i48
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i48 [[UPPER_ZEXT]], 24
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i24 [[LOWER]] to i48
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i48 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i48 [[MASK]], 24
+; CHECK-NEXT: [[SEL:%.*]] = lshr i48 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i48 [[SEL]] to i24
+; CHECK-NEXT: ret i24 [[TRUNC]]
+;
+ %upper.zext = zext i24 %upper to i48
+ %upper.shl = shl nuw i48 %upper.zext, 24
+ %lower.zext = zext i24 %lower to i48
+ %pack = or disjoint i48 %upper.shl, %lower.zext
+ %mask.bit = and i48 %mask, 24
+ %sel = lshr i48 %pack, %mask.bit
+ %trunc = trunc i48 %sel to i24
+ ret i24 %trunc
+}
+
+define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_32(
+; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: ret i32 [[SEL_V]]
+;
+ %upper.zext = zext i32 %upper to i64
+ %upper.shl = shl nuw i64 %upper.zext, 32
+ %lower.zext = zext i32 %lower to i64
+ %pack = or disjoint i64 %upper.shl, %lower.zext
+ %mask.bit = and i64 %mask, 32
+ %sel = lshr i64 %pack, %mask.bit
+ %trunc = trunc i64 %sel to i32
+ ret i32 %trunc
+}
+
+define i32 @selective_shift_32.commute(i64 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_32.commute(
+; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: ret i32 [[SEL_V]]
+;
+ %upper.zext = zext i32 %upper to i64
+ %upper.shl = shl nuw i64 %upper.zext, 32
+ %lower.zext = zext i32 %lower to i64
+ %pack = or disjoint i64 %lower.zext, %upper.shl
+ %mask.bit = and i64 %mask, 32
+ %sel = lshr i64 %pack, %mask.bit
+ %trunc = trunc i64 %sel to i32
+ ret i32 %trunc
+}
diff --git a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll
deleted file mode 100644
index 72bc543..0000000
--- a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll
+++ /dev/null
@@ -1,164 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S < %s -passes='loop(loop-rotate)' -loop-rotate-multi=true | FileCheck %s
-
-; Test loop rotation with multiple exits, some of them - deoptimizing.
-; We should end up with a latch which exit is non-deoptimizing, so we should rotate
-; more than once.
-
-declare i32 @llvm.experimental.deoptimize.i32(...)
-
-define i32 @test_cond_with_one_deopt_exit(ptr nonnull %a, i64 %x) {
-; Rotation done twice.
-; Latch should be at the 2nd condition (for.cond2), exiting to %return.
-;
-; CHECK-LABEL: @test_cond_with_one_deopt_exit(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VAL_A_IDX3:%.*]] = load i32, ptr %a, align 4
-; CHECK-NEXT: [[ZERO_CHECK4:%.*]] = icmp eq i32 [[VAL_A_IDX3]], 0
-; CHECK-NEXT: br i1 [[ZERO_CHECK4]], label %deopt.exit, label %for.cond2.lr.ph
-; CHECK: for.cond2.lr.ph:
-; CHECK-NEXT: [[FOR_CHECK8:%.*]] = icmp ult i64 0, %x
-; CHECK-NEXT: br i1 [[FOR_CHECK8]], label %for.body.lr.ph, label %return
-; CHECK: for.body.lr.ph:
-; CHECK-NEXT: br label %for.body
-; CHECK: for.cond2:
-; CHECK: [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x
-; CHECK-NEXT: br i1 [[FOR_CHECK]], label %for.body, label %for.cond2.return_crit_edge
-; CHECK: for.body:
-; CHECK: br label %for.tail
-; CHECK: for.tail:
-; CHECK: [[VAL_A_IDX:%.*]] = load i32, ptr
-; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0
-; CHECK-NEXT: br i1 [[ZERO_CHECK]], label %for.cond1.deopt.exit_crit_edge, label %for.cond2
-; CHECK: for.cond2.return_crit_edge:
-; CHECK-NEXT: {{%.*}} = phi i32
-; CHECK-NEXT: br label %return
-; CHECK: return:
-; CHECK-NEXT: [[SUM_LCSSA2:%.*]] = phi i32
-; CHECK-NEXT: ret i32 [[SUM_LCSSA2]]
-; CHECK: for.cond1.deopt.exit_crit_edge:
-; CHECK-NEXT: {{%.*}} = phi i32
-; CHECK-NEXT: br label %deopt.exit
-; CHECK: deopt.exit:
-; CHECK: [[DEOPT_VAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 {{%.*}}) ]
-; CHECK-NEXT: ret i32 [[DEOPT_VAL]]
-;
-entry:
- br label %for.cond1
-
-for.cond1:
- %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ]
- %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ]
- %a.idx = getelementptr inbounds i32, ptr %a, i64 %idx
- %val.a.idx = load i32, ptr %a.idx, align 4
- %zero.check = icmp eq i32 %val.a.idx, 0
- br i1 %zero.check, label %deopt.exit, label %for.cond2
-
-for.cond2:
- %for.check = icmp ult i64 %idx, %x
- br i1 %for.check, label %for.body, label %return
-
-for.body:
- br label %for.tail
-
-for.tail:
- %sum.next = add i32 %sum, %val.a.idx
- %idx.next = add nuw nsw i64 %idx, 1
- br label %for.cond1
-
-return:
- ret i32 %sum
-
-deopt.exit:
- %deopt.val = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ]
- ret i32 %deopt.val
-}
-
-define i32 @test_cond_with_two_deopt_exits(ptr nonnull %a, i64 %x) {
-; Rotation done three times.
-; Latch should be at the 3rd condition (for.cond3), exiting to %return.
-;
-; CHECK-LABEL: @test_cond_with_two_deopt_exits(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A_IDX_DEREF4:%.*]] = load ptr, ptr %a
-; CHECK-NEXT: [[NULL_CHECK5:%.*]] = icmp eq ptr [[A_IDX_DEREF4]], null
-; CHECK-NEXT: br i1 [[NULL_CHECK5]], label %deopt.exit1, label %for.cond2.lr.ph
-; CHECK: for.cond2.lr.ph:
-; CHECK-NEXT: [[VAL_A_IDX9:%.*]] = load i32, ptr [[A_IDX_DEREF4]], align 4
-; CHECK-NEXT: [[ZERO_CHECK10:%.*]] = icmp eq i32 [[VAL_A_IDX9]], 0
-; CHECK-NEXT: br i1 [[ZERO_CHECK10]], label %deopt.exit2, label %for.cond3.lr.ph
-; CHECK: for.cond3.lr.ph:
-; CHECK-NEXT: [[FOR_CHECK14:%.*]] = icmp ult i64 0, %x
-; CHECK-NEXT: br i1 [[FOR_CHECK14]], label %for.body.lr.ph, label %return
-; CHECK: for.body.lr.ph:
-; CHECK-NEXT: br label %for.body
-; CHECK: for.cond2:
-; CHECK: [[VAL_A_IDX:%.*]] = load i32, ptr
-; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0
-; CHECK-NEXT: br i1 [[ZERO_CHECK]], label %for.cond2.deopt.exit2_crit_edge, label %for.cond3
-; CHECK: for.cond3:
-; CHECK: [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x
-; CHECK-NEXT: br i1 [[FOR_CHECK]], label %for.body, label %for.cond3.return_crit_edge
-; CHECK: for.body:
-; CHECK: br label %for.tail
-; CHECK: for.tail:
-; CHECK: [[IDX_NEXT:%.*]] = add nuw nsw i64 {{%.*}}, 1
-; CHECK: [[NULL_CHECK:%.*]] = icmp eq ptr {{%.*}}, null
-; CHECK-NEXT: br i1 [[NULL_CHECK]], label %for.cond1.deopt.exit1_crit_edge, label %for.cond2
-; CHECK: for.cond3.return_crit_edge:
-; CHECK-NEXT: [[SPLIT18:%.*]] = phi i32
-; CHECK-NEXT: br label %return
-; CHECK: return:
-; CHECK-NEXT: [[SUM_LCSSA2:%.*]] = phi i32
-; CHECK-NEXT: ret i32 [[SUM_LCSSA2]]
-; CHECK: for.cond1.deopt.exit1_crit_edge:
-; CHECK-NEXT: br label %deopt.exit1
-; CHECK: deopt.exit1:
-; CHECK-NEXT: [[DEOPT_VAL1:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ]
-; CHECK-NEXT: ret i32 [[DEOPT_VAL1]]
-; CHECK: for.cond2.deopt.exit2_crit_edge:
-; CHECK-NEXT: [[SPLIT:%.*]] = phi i32
-; CHECK-NEXT: br label %deopt.exit2
-; CHECK: deopt.exit2:
-; CHECK-NEXT: [[VAL_A_IDX_LCSSA:%.*]] = phi i32
-; CHECK-NEXT: [[DEOPT_VAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[VAL_A_IDX_LCSSA]]) ]
-; CHECK-NEXT: ret i32 [[DEOPT_VAL2]]
-;
-entry:
- br label %for.cond1
-
-for.cond1:
- %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ]
- %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ]
- %a.idx = getelementptr inbounds ptr, ptr %a, i64 %idx
- %a.idx.deref = load ptr, ptr %a.idx
- %null.check = icmp eq ptr %a.idx.deref, null
- br i1 %null.check, label %deopt.exit1, label %for.cond2
-
-for.cond2:
- %val.a.idx = load i32, ptr %a.idx.deref, align 4
- %zero.check = icmp eq i32 %val.a.idx, 0
- br i1 %zero.check, label %deopt.exit2, label %for.cond3
-
-for.cond3:
- %for.check = icmp ult i64 %idx, %x
- br i1 %for.check, label %for.body, label %return
-
-for.body:
- br label %for.tail
-
-for.tail:
- %sum.next = add i32 %sum, %val.a.idx
- %idx.next = add nuw nsw i64 %idx, 1
- br label %for.cond1
-
-return:
- ret i32 %sum
-
-deopt.exit1:
- %deopt.val1 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ]
- ret i32 %deopt.val1
-deopt.exit2:
- %deopt.val2 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ]
- ret i32 %deopt.val2
-}
diff --git a/llvm/test/Transforms/LoopRotate/multiple-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-exits.ll
deleted file mode 100644
index 748700c..0000000
--- a/llvm/test/Transforms/LoopRotate/multiple-exits.ll
+++ /dev/null
@@ -1,236 +0,0 @@
-; RUN: opt -S -passes=loop-rotate < %s -verify-loop-info -verify-dom-info -verify-memoryssa | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
-
-; PR7447
-define i32 @test1(ptr nocapture %a) nounwind readonly {
-entry:
- br label %for.cond
-
-for.cond: ; preds = %for.cond1, %entry
- %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ]
- %i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ]
- br i1 %i.0, label %for.cond1, label %return
-
-for.cond1: ; preds = %for.cond, %land.rhs
- %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ]
- %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ]
- %cmp2 = icmp ult i32 %i.1, 100
- br i1 %cmp2, label %land.rhs, label %for.cond
-
-land.rhs: ; preds = %for.cond1
- %conv = zext i32 %i.1 to i64
- %arrayidx = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 %conv
- %0 = load i32, ptr %arrayidx, align 4
- %add = add i32 %0, %sum.1
- %cmp4 = icmp ugt i32 %add, 1000
- %inc = add i32 %i.1, 1
- br i1 %cmp4, label %return, label %for.cond1
-
-return: ; preds = %for.cond, %land.rhs
- %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ]
- ret i32 %retval.0
-
-; CHECK-LABEL: @test1(
-; CHECK: for.cond1.preheader:
-; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ]
-; CHECK: br label %for.cond1
-
-; CHECK: for.cond1:
-; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ]
-; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ]
-; CHECK: %cmp2 = icmp ult i32 %i.1, 100
-; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit
-}
-
-define void @test2(i32 %x) nounwind {
-entry:
- br label %for.cond
-
-for.cond: ; preds = %if.end, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
- %cmp = icmp eq i32 %i.0, %x
- br i1 %cmp, label %return.loopexit, label %for.body
-
-for.body: ; preds = %for.cond
- %call = tail call i32 @foo(i32 %i.0) nounwind
- %tobool = icmp eq i32 %call, 0
- br i1 %tobool, label %if.end, label %a
-
-if.end: ; preds = %for.body
- %call1 = tail call i32 @foo(i32 42) nounwind
- %inc = add i32 %i.0, 1
- br label %for.cond
-
-a: ; preds = %for.body
- %call2 = tail call i32 @bar(i32 1) nounwind
- br label %return
-
-return.loopexit: ; preds = %for.cond
- br label %return
-
-return: ; preds = %return.loopexit, %a
- ret void
-
-; CHECK-LABEL: @test2(
-; CHECK: if.end:
-; CHECK: %inc = add i32 %i.02, 1
-; CHECK: %cmp = icmp eq i32 %inc, %x
-; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body
-}
-
-declare i32 @foo(i32)
-
-declare i32 @bar(i32)
-
-@_ZTIi = external constant ptr
-
-; Verify dominators.
-define void @test3(i32 %x) personality ptr @__gxx_personality_v0 {
-entry:
- %cmp2 = icmp eq i32 0, %x
- br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph
-
-for.body.lr.ph: ; preds = %entry
- br label %for.body
-
-for.body: ; preds = %for.body.lr.ph, %for.inc
- %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
- invoke void @_Z3fooi(i32 %i.03)
- to label %for.inc unwind label %lpad
-
-for.inc: ; preds = %for.body
- %inc = add i32 %i.03, 1
- %cmp = icmp eq i32 %inc, %x
- br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body
-
-lpad: ; preds = %for.body
- %0 = landingpad { ptr, i32 }
- catch ptr @_ZTIi
- %1 = extractvalue { ptr, i32 } %0, 0
- %2 = extractvalue { ptr, i32 } %0, 1
- %3 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi) nounwind
- %matches = icmp eq i32 %2, %3
- br i1 %matches, label %catch, label %eh.resume
-
-catch: ; preds = %lpad
- %4 = tail call ptr @__cxa_begin_catch(ptr %1) nounwind
- br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph
-
-for.body.i.lr.ph: ; preds = %catch
- br label %for.body.i
-
-for.body.i: ; preds = %for.body.i.lr.ph, %for.inc.i
- %i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ]
- invoke void @_Z3fooi(i32 %i.0.i1)
- to label %for.inc.i unwind label %lpad.i
-
-for.inc.i: ; preds = %for.body.i
- %inc.i = add i32 %i.0.i1, 1
- %cmp.i = icmp eq i32 %inc.i, 0
- br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i
-
-lpad.i: ; preds = %for.body.i
- %5 = landingpad { ptr, i32 }
- catch ptr @_ZTIi
- %6 = extractvalue { ptr, i32 } %5, 0
- %7 = extractvalue { ptr, i32 } %5, 1
- %matches.i = icmp eq i32 %7, %3
- br i1 %matches.i, label %catch.i, label %lpad1.body
-
-catch.i: ; preds = %lpad.i
- %8 = tail call ptr @__cxa_begin_catch(ptr %6) nounwind
- invoke void @test3(i32 0)
- to label %invoke.cont2.i unwind label %lpad1.i
-
-invoke.cont2.i: ; preds = %catch.i
- tail call void @__cxa_end_catch() nounwind
- br label %invoke.cont2
-
-lpad1.i: ; preds = %catch.i
- %9 = landingpad { ptr, i32 }
- cleanup
- %10 = extractvalue { ptr, i32 } %9, 0
- %11 = extractvalue { ptr, i32 } %9, 1
- tail call void @__cxa_end_catch() nounwind
- br label %lpad1.body
-
-for.cond.i.invoke.cont2.loopexit_crit_edge: ; preds = %for.inc.i
- br label %invoke.cont2.loopexit
-
-invoke.cont2.loopexit: ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch
- br label %invoke.cont2
-
-invoke.cont2: ; preds = %invoke.cont2.loopexit, %invoke.cont2.i
- tail call void @__cxa_end_catch() nounwind
- br label %try.cont
-
-for.cond.try.cont.loopexit_crit_edge: ; preds = %for.inc
- br label %try.cont.loopexit
-
-try.cont.loopexit: ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry
- br label %try.cont
-
-try.cont: ; preds = %try.cont.loopexit, %invoke.cont2
- ret void
-
-lpad1.body: ; preds = %lpad1.i, %lpad.i
- %exn.slot.0.i = phi ptr [ %10, %lpad1.i ], [ %6, %lpad.i ]
- %ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ]
- tail call void @__cxa_end_catch() nounwind
- br label %eh.resume
-
-eh.resume: ; preds = %lpad1.body, %lpad
- %exn.slot.0 = phi ptr [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ]
- %ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ]
- %lpad.val = insertvalue { ptr, i32 } undef, ptr %exn.slot.0, 0
- %lpad.val5 = insertvalue { ptr, i32 } %lpad.val, i32 %ehselector.slot.0, 1
- resume { ptr, i32 } %lpad.val5
-}
-
-declare void @_Z3fooi(i32)
-
-declare i32 @__gxx_personality_v0(...)
-
-declare i32 @llvm.eh.typeid.for(ptr) nounwind readnone
-
-declare ptr @__cxa_begin_catch(ptr)
-
-declare void @__cxa_end_catch()
-
-define void @test4(i1 %arg) nounwind uwtable {
-entry:
- br label %"7"
-
-"3": ; preds = %"7"
- br i1 %arg, label %"31", label %"4"
-
-"4": ; preds = %"3"
- %. = select i1 undef, float 0x3F50624DE0000000, float undef
- %0 = add i32 %1, 1
- br label %"7"
-
-"7": ; preds = %"4", %entry
- %1 = phi i32 [ %0, %"4" ], [ 0, %entry ]
- %2 = icmp slt i32 %1, 100
- br i1 %2, label %"3", label %"8"
-
-"8": ; preds = %"7"
- br i1 %arg, label %"9", label %"31"
-
-"9": ; preds = %"8"
- br label %"33"
-
-"27": ; preds = %"31"
- unreachable
-
-"31": ; preds = %"8", %"3"
- br i1 %arg, label %"27", label %"32"
-
-"32": ; preds = %"31"
- br label %"33"
-
-"33": ; preds = %"32", %"9"
- ret void
-}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll
index 649e34e..7548bf6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll
@@ -45,9 +45,6 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly
; CHECK-UF4-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 8)
; CHECK-UF4-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4)
; CHECK-UF4-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0)
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 4, i64 [[N]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY2:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 8, i64 [[N]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 12, i64 [[N]])
; CHECK-UF4-NEXT: br label [[VECTOR_BODY1:%.*]]
; CHECK-UF4: vector.body:
; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY1]] ]
@@ -67,17 +64,11 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly
; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK5]])
; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP19]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK6]])
; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
-; CHECK-UF4-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4
-; CHECK-UF4-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8
-; CHECK-UF4-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[INDEX]], i64 [[TMP6]])
; CHECK-UF4-NEXT: [[TMP12]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 12)
; CHECK-UF4-NEXT: [[TMP11]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 8)
; CHECK-UF4-NEXT: [[TMP10]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4)
; CHECK-UF4-NEXT: [[TMP9]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0)
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT7:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP13]], i64 [[TMP6]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT8:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP14]], i64 [[TMP6]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT9:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP15]], i64 [[TMP6]])
; CHECK-UF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0
; CHECK-UF4-NEXT: [[TMP20:%.*]] = xor i1 [[TMP21]], true
; CHECK-UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll
index 5ee4e9e..75acbea9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll
@@ -46,23 +46,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]]
; CHECK-UF4-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP3]]
; CHECK-UF4-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
-; CHECK-UF4-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4
-; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]]
-; CHECK-UF4-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 5
-; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]]
-; CHECK-UF4-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48
-; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]]
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 0, i64 [[N]])
; CHECK-UF4-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 48)
; CHECK-UF4-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 32)
; CHECK-UF4-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 16)
; CHECK-UF4-NEXT: [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0)
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]])
; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-UF4: vector.body:
; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,23 +91,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP27]], ptr [[TMP42]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK7]])
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP28]], ptr [[TMP45]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK8]])
; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]]
-; CHECK-UF4-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 4
-; CHECK-UF4-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]]
-; CHECK-UF4-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP50:%.*]] = shl nuw i64 [[TMP49]], 5
-; CHECK-UF4-NEXT: [[TMP51:%.*]] = add i64 [[INDEX]], [[TMP50]]
-; CHECK-UF4-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP53:%.*]] = mul nuw i64 [[TMP52]], 48
-; CHECK-UF4-NEXT: [[TMP54:%.*]] = add i64 [[INDEX]], [[TMP53]]
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 [[INDEX]], i64 [[TMP9]])
; CHECK-UF4-NEXT: [[TMP58]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 48)
; CHECK-UF4-NEXT: [[TMP57]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 32)
; CHECK-UF4-NEXT: [[TMP56]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 16)
; CHECK-UF4-NEXT: [[TMP55]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0)
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP48]], i64 [[TMP9]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP51]], i64 [[TMP9]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP54]], i64 [[TMP9]])
; CHECK-UF4-NEXT: [[TMP59:%.*]] = extractelement <vscale x 16 x i1> [[TMP55]], i32 0
; CHECK-UF4-NEXT: [[TMP60:%.*]] = xor i1 [[TMP59]], true
; CHECK-UF4-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -191,23 +167,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]]
; CHECK-UF4-NEXT: [[TMP56:%.*]] = icmp ugt i64 [[N]], [[TMP26]]
; CHECK-UF4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = select i1 [[TMP56]], i64 [[TMP31]], i64 0
-; CHECK-UF4-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
-; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]]
-; CHECK-UF4-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
-; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]]
-; CHECK-UF4-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 6
-; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]]
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]])
; CHECK-UF4-NEXT: [[TMP14:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 6)
; CHECK-UF4-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4)
; CHECK-UF4-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 2)
; CHECK-UF4-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0)
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]])
; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-UF4: vector.body:
; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -248,23 +212,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP18]], ptr [[TMP37]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK7]])
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP19]], ptr [[TMP40]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK8]])
; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]]
-; CHECK-UF4-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 1
-; CHECK-UF4-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]]
-; CHECK-UF4-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP45:%.*]] = shl nuw i64 [[TMP44]], 2
-; CHECK-UF4-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]]
-; CHECK-UF4-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 6
-; CHECK-UF4-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP48]]
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[WIDE_TRIP_COUNT]])
; CHECK-UF4-NEXT: [[TMP53]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 6)
; CHECK-UF4-NEXT: [[TMP52]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4)
; CHECK-UF4-NEXT: [[TMP51]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 2)
; CHECK-UF4-NEXT: [[TMP50]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0)
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP43]], i64 [[WIDE_TRIP_COUNT]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP46]], i64 [[WIDE_TRIP_COUNT]])
-; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP49]], i64 [[WIDE_TRIP_COUNT]])
; CHECK-UF4-NEXT: [[TMP54:%.*]] = extractelement <vscale x 2 x i1> [[TMP50]], i32 0
; CHECK-UF4-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true
; CHECK-UF4-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
index f5329cf..c225ede5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -580,6 +580,201 @@ exit:
ret double %accum
}
+define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %src, ptr noalias %src.2, ptr noalias %dst) #0 {
+; I64-LABEL: define void @loaded_address_used_by_load_through_blend(
+; I64-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] {
+; I64-NEXT: [[ENTRY:.*]]:
+; I64-NEXT: br label %[[LOOP_HEADER:.*]]
+; I64: [[LOOP_HEADER]]:
+; I64-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; I64-NEXT: [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_LATCH]] ]
+; I64-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1
+; I64-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_1]]
+; I64-NEXT: [[L_SRC:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; I64-NEXT: [[C:%.*]] = fcmp oeq float [[L_SRC]], 0.000000e+00
+; I64-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; I64: [[THEN]]:
+; I64-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV_1]], [[START]]
+; I64-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[IV_MUL]]
+; I64-NEXT: br label %[[LOOP_LATCH]]
+; I64: [[LOOP_LATCH]]:
+; I64-NEXT: [[MERGE_GEP:%.*]] = phi ptr [ [[GEP_SRC_2]], %[[THEN]] ], [ [[SRC_2]], %[[LOOP_HEADER]] ]
+; I64-NEXT: [[L_2:%.*]] = load float, ptr [[MERGE_GEP]], align 4
+; I64-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
+; I64-NEXT: store float [[L_2]], ptr [[GEP_DST]], align 4
+; I64-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; I64-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1
+; I64-NEXT: [[EC:%.*]] = icmp sgt i64 [[IV_2]], 100
+; I64-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]]
+; I64: [[EXIT]]:
+; I64-NEXT: ret void
+;
+; I32-LABEL: define void @loaded_address_used_by_load_through_blend(
+; I32-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] {
+; I32-NEXT: [[ENTRY:.*:]]
+; I32-NEXT: [[TMP0:%.*]] = add i64 [[START]], 1
+; I32-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 100)
+; I32-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]]
+; I32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8
+; I32-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; I32: [[VECTOR_PH]]:
+; I32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8
+; I32-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
+; I32-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]]
+; I32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[START]], i64 0
+; I32-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+; I32-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x ptr> poison, ptr [[SRC_2]], i64 0
+; I32-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT1]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; I32-NEXT: br label %[[VECTOR_BODY:.*]]
+; I32: [[VECTOR_BODY]]:
+; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; I32-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; I32-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2
+; I32-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
+; I32-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 4
+; I32-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5
+; I32-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6
+; I32-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7
+; I32-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], 1
+; I32-NEXT: [[TMP12:%.*]] = add i64 [[TMP4]], 1
+; I32-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], 1
+; I32-NEXT: [[TMP14:%.*]] = add i64 [[TMP6]], 1
+; I32-NEXT: [[TMP15:%.*]] = add i64 [[TMP7]], 1
+; I32-NEXT: [[TMP16:%.*]] = add i64 [[TMP8]], 1
+; I32-NEXT: [[TMP17:%.*]] = add i64 [[TMP9]], 1
+; I32-NEXT: [[TMP18:%.*]] = add i64 [[TMP10]], 1
+; I32-NEXT: [[TMP19:%.*]] = insertelement <8 x i64> poison, i64 [[TMP11]], i32 0
+; I32-NEXT: [[TMP20:%.*]] = insertelement <8 x i64> [[TMP19]], i64 [[TMP12]], i32 1
+; I32-NEXT: [[TMP21:%.*]] = insertelement <8 x i64> [[TMP20]], i64 [[TMP13]], i32 2
+; I32-NEXT: [[TMP22:%.*]] = insertelement <8 x i64> [[TMP21]], i64 [[TMP14]], i32 3
+; I32-NEXT: [[TMP23:%.*]] = insertelement <8 x i64> [[TMP22]], i64 [[TMP15]], i32 4
+; I32-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 5
+; I32-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 6
+; I32-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 7
+; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP11]]
+; I32-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]]
+; I32-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]]
+; I32-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP14]]
+; I32-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP15]]
+; I32-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP16]]
+; I32-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP17]]
+; I32-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]]
+; I32-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP27]], align 4
+; I32-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP28]], align 4
+; I32-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4
+; I32-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4
+; I32-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4
+; I32-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP32]], align 4
+; I32-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4
+; I32-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4
+; I32-NEXT: [[TMP43:%.*]] = insertelement <8 x float> poison, float [[TMP35]], i32 0
+; I32-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP36]], i32 1
+; I32-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP37]], i32 2
+; I32-NEXT: [[TMP46:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP38]], i32 3
+; I32-NEXT: [[TMP47:%.*]] = insertelement <8 x float> [[TMP46]], float [[TMP39]], i32 4
+; I32-NEXT: [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP40]], i32 5
+; I32-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP41]], i32 6
+; I32-NEXT: [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[TMP42]], i32 7
+; I32-NEXT: [[TMP51:%.*]] = fcmp oeq <8 x float> [[TMP50]], zeroinitializer
+; I32-NEXT: [[TMP52:%.*]] = mul <8 x i64> [[TMP26]], [[BROADCAST_SPLAT]]
+; I32-NEXT: [[TMP53:%.*]] = extractelement <8 x i64> [[TMP52]], i32 0
+; I32-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP53]]
+; I32-NEXT: [[TMP55:%.*]] = extractelement <8 x i64> [[TMP52]], i32 1
+; I32-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP55]]
+; I32-NEXT: [[TMP57:%.*]] = extractelement <8 x i64> [[TMP52]], i32 2
+; I32-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP57]]
+; I32-NEXT: [[TMP59:%.*]] = extractelement <8 x i64> [[TMP52]], i32 3
+; I32-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP59]]
+; I32-NEXT: [[TMP61:%.*]] = extractelement <8 x i64> [[TMP52]], i32 4
+; I32-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP61]]
+; I32-NEXT: [[TMP63:%.*]] = extractelement <8 x i64> [[TMP52]], i32 5
+; I32-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP63]]
+; I32-NEXT: [[TMP65:%.*]] = extractelement <8 x i64> [[TMP52]], i32 6
+; I32-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP65]]
+; I32-NEXT: [[TMP67:%.*]] = extractelement <8 x i64> [[TMP52]], i32 7
+; I32-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP67]]
+; I32-NEXT: [[TMP69:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP54]], i32 0
+; I32-NEXT: [[TMP70:%.*]] = insertelement <8 x ptr> [[TMP69]], ptr [[TMP56]], i32 1
+; I32-NEXT: [[TMP71:%.*]] = insertelement <8 x ptr> [[TMP70]], ptr [[TMP58]], i32 2
+; I32-NEXT: [[TMP72:%.*]] = insertelement <8 x ptr> [[TMP71]], ptr [[TMP60]], i32 3
+; I32-NEXT: [[TMP73:%.*]] = insertelement <8 x ptr> [[TMP72]], ptr [[TMP62]], i32 4
+; I32-NEXT: [[TMP74:%.*]] = insertelement <8 x ptr> [[TMP73]], ptr [[TMP64]], i32 5
+; I32-NEXT: [[TMP75:%.*]] = insertelement <8 x ptr> [[TMP74]], ptr [[TMP66]], i32 6
+; I32-NEXT: [[TMP76:%.*]] = insertelement <8 x ptr> [[TMP75]], ptr [[TMP68]], i32 7
+; I32-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP51]], <8 x ptr> [[TMP76]], <8 x ptr> [[BROADCAST_SPLAT2]]
+; I32-NEXT: [[TMP77:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 0
+; I32-NEXT: [[TMP78:%.*]] = load float, ptr [[TMP77]], align 4
+; I32-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 1
+; I32-NEXT: [[TMP80:%.*]] = load float, ptr [[TMP79]], align 4
+; I32-NEXT: [[TMP81:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 2
+; I32-NEXT: [[TMP82:%.*]] = load float, ptr [[TMP81]], align 4
+; I32-NEXT: [[TMP83:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 3
+; I32-NEXT: [[TMP84:%.*]] = load float, ptr [[TMP83]], align 4
+; I32-NEXT: [[TMP85:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 4
+; I32-NEXT: [[TMP86:%.*]] = load float, ptr [[TMP85]], align 4
+; I32-NEXT: [[TMP87:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 5
+; I32-NEXT: [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4
+; I32-NEXT: [[TMP89:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 6
+; I32-NEXT: [[TMP90:%.*]] = load float, ptr [[TMP89]], align 4
+; I32-NEXT: [[TMP91:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 7
+; I32-NEXT: [[TMP92:%.*]] = load float, ptr [[TMP91]], align 4
+; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
+; I32-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
+; I32-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
+; I32-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
+; I32-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; I32-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
+; I32-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]]
+; I32-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]]
+; I32-NEXT: store float [[TMP78]], ptr [[TMP93]], align 4
+; I32-NEXT: store float [[TMP80]], ptr [[TMP94]], align 4
+; I32-NEXT: store float [[TMP82]], ptr [[TMP95]], align 4
+; I32-NEXT: store float [[TMP84]], ptr [[TMP96]], align 4
+; I32-NEXT: store float [[TMP86]], ptr [[TMP97]], align 4
+; I32-NEXT: store float [[TMP88]], ptr [[TMP98]], align 4
+; I32-NEXT: store float [[TMP90]], ptr [[TMP99]], align 4
+; I32-NEXT: store float [[TMP92]], ptr [[TMP100]], align 4
+; I32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; I32-NEXT: [[TMP101:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; I32-NEXT: br i1 [[TMP101]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; I32: [[MIDDLE_BLOCK]]:
+; I32-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
+; I32-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
+; I32: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %iv.2 = phi i64 [ %start, %entry ], [ %iv.2.next, %loop.latch ]
+ %iv.1 = add i64 %iv, 1
+ %gep.src = getelementptr i8, ptr %src, i64 %iv.1
+ %l.src = load float, ptr %gep.src, align 4
+ %c = fcmp oeq float %l.src, 0.000000e+00
+ br i1 %c, label %then, label %loop.latch
+
+then:
+ %iv.mul = mul i64 %iv.1, %start
+ %gep.src.2 = getelementptr i8, ptr %src.2, i64 %iv.mul
+ br label %loop.latch
+
+loop.latch:
+ %merge.gep = phi ptr [ %gep.src.2, %then ], [ %src.2, %loop.header ]
+ %l.2 = load float, ptr %merge.gep, align 4
+ %gep.dst = getelementptr i8, ptr %dst, i64 %iv
+ store float %l.2, ptr %gep.dst, align 4
+ %iv.next = add i64 %iv, 1
+ %iv.2.next = add i64 %iv.2, -1
+ %ec = icmp sgt i64 %iv.2, 100
+ br i1 %ec, label %loop.header, label %exit
+
+exit:
+ ret void
+}
+
+attributes #0 = { "target-cpu"="znver3" }
attributes #0 = { "target-cpu"="znver2" }
!0 = distinct !{!0, !1}
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
index af57967..b63ab8f 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
@@ -22,7 +22,6 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 256)
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/NewGVN/ptrtoaddr.ll b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll
new file mode 100644
index 0000000..e51b42a
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=newgvn < %s | FileCheck %s
+
+define i64 @ptrtoaddr_same(ptr %p) {
+; CHECK-LABEL: define i64 @ptrtoaddr_same(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: ret i64 0
+;
+ %i = ptrtoaddr ptr %p to i64
+ %j = ptrtoaddr ptr %p to i64
+ %sub = sub i64 %i, %j
+ ret i64 %sub
+}
+
+; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ
+; in result type for the same input.
+define i64 @ptrtoaddr_different(ptr %p, ptr %p2) {
+; CHECK-LABEL: define i64 @ptrtoaddr_different(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[I]], [[J]]
+; CHECK-NEXT: ret i64 [[SUB]]
+;
+ %i = ptrtoaddr ptr %p to i64
+ %j = ptrtoaddr ptr %p2 to i64
+ %sub = sub i64 %i, %j
+ ret i64 %sub
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll
new file mode 100644
index 0000000..7c9888f
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll
@@ -0,0 +1,448 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -O3 < %s | FileCheck %s
+
+target datalayout = "n64:32:16:8"
+
+define i8 @test(i8 %x) {
+; CHECK-LABEL: define range(i8 0, 53) i8 @test(
+; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[START:.*:]]
+; CHECK-NEXT: [[X_:%.*]] = tail call i8 @llvm.umin.i8(i8 [[X]], i8 52)
+; CHECK-NEXT: ret i8 [[X_]]
+;
+start:
+ %_0 = alloca [1 x i8], align 1
+ %0 = icmp eq i8 %x, 0
+ br i1 %0, label %bb1, label %bb2
+
+bb1: ; preds = %start
+ store i8 0, ptr %_0, align 1
+ br label %bb105
+
+bb2: ; preds = %start
+ %1 = icmp eq i8 %x, 1
+ br i1 %1, label %bb3, label %bb4
+
+bb105: ; preds = %bb104, %bb103, %bb101, %bb99, %bb97, %bb95, %bb93, %bb91, %bb89, %bb87, %bb85, %bb83, %bb81, %bb79, %bb77, %bb75, %bb73, %bb71, %bb69, %bb67, %bb65, %bb63, %bb61, %bb59, %bb57, %bb55, %bb53, %bb51, %bb49, %bb47, %bb45, %bb43, %bb41, %bb39, %bb37, %bb35, %bb33, %bb31, %bb29, %bb27, %bb25, %bb23, %bb21, %bb19, %bb17, %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1
+ %2 = load i8, ptr %_0, align 1
+ ret i8 %2
+
+bb3: ; preds = %bb2
+ store i8 1, ptr %_0, align 1
+ br label %bb105
+
+bb4: ; preds = %bb2
+ %3 = icmp eq i8 %x, 2
+ br i1 %3, label %bb5, label %bb6
+
+bb5: ; preds = %bb4
+ store i8 2, ptr %_0, align 1
+ br label %bb105
+
+bb6: ; preds = %bb4
+ %4 = icmp eq i8 %x, 3
+ br i1 %4, label %bb7, label %bb8
+
+bb7: ; preds = %bb6
+ store i8 3, ptr %_0, align 1
+ br label %bb105
+
+bb8: ; preds = %bb6
+ %5 = icmp eq i8 %x, 4
+ br i1 %5, label %bb9, label %bb10
+
+bb9: ; preds = %bb8
+ store i8 4, ptr %_0, align 1
+ br label %bb105
+
+bb10: ; preds = %bb8
+ %6 = icmp eq i8 %x, 5
+ br i1 %6, label %bb11, label %bb12
+
+bb11: ; preds = %bb10
+ store i8 5, ptr %_0, align 1
+ br label %bb105
+
+bb12: ; preds = %bb10
+ %7 = icmp eq i8 %x, 6
+ br i1 %7, label %bb13, label %bb14
+
+bb13: ; preds = %bb12
+ store i8 6, ptr %_0, align 1
+ br label %bb105
+
+bb14: ; preds = %bb12
+ %8 = icmp eq i8 %x, 7
+ br i1 %8, label %bb15, label %bb16
+
+bb15: ; preds = %bb14
+ store i8 7, ptr %_0, align 1
+ br label %bb105
+
+bb16: ; preds = %bb14
+ %9 = icmp eq i8 %x, 8
+ br i1 %9, label %bb17, label %bb18
+
+bb17: ; preds = %bb16
+ store i8 8, ptr %_0, align 1
+ br label %bb105
+
+bb18: ; preds = %bb16
+ %10 = icmp eq i8 %x, 9
+ br i1 %10, label %bb19, label %bb20
+
+bb19: ; preds = %bb18
+ store i8 9, ptr %_0, align 1
+ br label %bb105
+
+bb20: ; preds = %bb18
+ %11 = icmp eq i8 %x, 10
+ br i1 %11, label %bb21, label %bb22
+
+bb21: ; preds = %bb20
+ store i8 10, ptr %_0, align 1
+ br label %bb105
+
+bb22: ; preds = %bb20
+ %12 = icmp eq i8 %x, 11
+ br i1 %12, label %bb23, label %bb24
+
+bb23: ; preds = %bb22
+ store i8 11, ptr %_0, align 1
+ br label %bb105
+
+bb24: ; preds = %bb22
+ %13 = icmp eq i8 %x, 12
+ br i1 %13, label %bb25, label %bb26
+
+bb25: ; preds = %bb24
+ store i8 12, ptr %_0, align 1
+ br label %bb105
+
+bb26: ; preds = %bb24
+ %14 = icmp eq i8 %x, 13
+ br i1 %14, label %bb27, label %bb28
+
+bb27: ; preds = %bb26
+ store i8 13, ptr %_0, align 1
+ br label %bb105
+
+bb28: ; preds = %bb26
+ %15 = icmp eq i8 %x, 14
+ br i1 %15, label %bb29, label %bb30
+
+bb29: ; preds = %bb28
+ store i8 14, ptr %_0, align 1
+ br label %bb105
+
+bb30: ; preds = %bb28
+ %16 = icmp eq i8 %x, 15
+ br i1 %16, label %bb31, label %bb32
+
+bb31: ; preds = %bb30
+ store i8 15, ptr %_0, align 1
+ br label %bb105
+
+bb32: ; preds = %bb30
+ %17 = icmp eq i8 %x, 16
+ br i1 %17, label %bb33, label %bb34
+
+bb33: ; preds = %bb32
+ store i8 16, ptr %_0, align 1
+ br label %bb105
+
+bb34: ; preds = %bb32
+ %18 = icmp eq i8 %x, 17
+ br i1 %18, label %bb35, label %bb36
+
+bb35: ; preds = %bb34
+ store i8 17, ptr %_0, align 1
+ br label %bb105
+
+bb36: ; preds = %bb34
+ %19 = icmp eq i8 %x, 18
+ br i1 %19, label %bb37, label %bb38
+
+bb37: ; preds = %bb36
+ store i8 18, ptr %_0, align 1
+ br label %bb105
+
+bb38: ; preds = %bb36
+ %20 = icmp eq i8 %x, 19
+ br i1 %20, label %bb39, label %bb40
+
+bb39: ; preds = %bb38
+ store i8 19, ptr %_0, align 1
+ br label %bb105
+
+bb40: ; preds = %bb38
+ %21 = icmp eq i8 %x, 20
+ br i1 %21, label %bb41, label %bb42
+
+bb41: ; preds = %bb40
+ store i8 20, ptr %_0, align 1
+ br label %bb105
+
+bb42: ; preds = %bb40
+ %22 = icmp eq i8 %x, 21
+ br i1 %22, label %bb43, label %bb44
+
+bb43: ; preds = %bb42
+ store i8 21, ptr %_0, align 1
+ br label %bb105
+
+bb44: ; preds = %bb42
+ %23 = icmp eq i8 %x, 22
+ br i1 %23, label %bb45, label %bb46
+
+bb45: ; preds = %bb44
+ store i8 22, ptr %_0, align 1
+ br label %bb105
+
+bb46: ; preds = %bb44
+ %24 = icmp eq i8 %x, 23
+ br i1 %24, label %bb47, label %bb48
+
+bb47: ; preds = %bb46
+ store i8 23, ptr %_0, align 1
+ br label %bb105
+
+bb48: ; preds = %bb46
+ %25 = icmp eq i8 %x, 24
+ br i1 %25, label %bb49, label %bb50
+
+bb49: ; preds = %bb48
+ store i8 24, ptr %_0, align 1
+ br label %bb105
+
+bb50: ; preds = %bb48
+ %26 = icmp eq i8 %x, 25
+ br i1 %26, label %bb51, label %bb52
+
+bb51: ; preds = %bb50
+ store i8 25, ptr %_0, align 1
+ br label %bb105
+
+bb52: ; preds = %bb50
+ %27 = icmp eq i8 %x, 26
+ br i1 %27, label %bb53, label %bb54
+
+bb53: ; preds = %bb52
+ store i8 26, ptr %_0, align 1
+ br label %bb105
+
+bb54: ; preds = %bb52
+ %28 = icmp eq i8 %x, 27
+ br i1 %28, label %bb55, label %bb56
+
+bb55: ; preds = %bb54
+ store i8 27, ptr %_0, align 1
+ br label %bb105
+
+bb56: ; preds = %bb54
+ %29 = icmp eq i8 %x, 28
+ br i1 %29, label %bb57, label %bb58
+
+bb57: ; preds = %bb56
+ store i8 28, ptr %_0, align 1
+ br label %bb105
+
+bb58: ; preds = %bb56
+ %30 = icmp eq i8 %x, 29
+ br i1 %30, label %bb59, label %bb60
+
+bb59: ; preds = %bb58
+ store i8 29, ptr %_0, align 1
+ br label %bb105
+
+bb60: ; preds = %bb58
+ %31 = icmp eq i8 %x, 30
+ br i1 %31, label %bb61, label %bb62
+
+bb61: ; preds = %bb60
+ store i8 30, ptr %_0, align 1
+ br label %bb105
+
+bb62: ; preds = %bb60
+ %32 = icmp eq i8 %x, 31
+ br i1 %32, label %bb63, label %bb64
+
+bb63: ; preds = %bb62
+ store i8 31, ptr %_0, align 1
+ br label %bb105
+
+bb64: ; preds = %bb62
+ %33 = icmp eq i8 %x, 32
+ br i1 %33, label %bb65, label %bb66
+
+bb65: ; preds = %bb64
+ store i8 32, ptr %_0, align 1
+ br label %bb105
+
+bb66: ; preds = %bb64
+ %34 = icmp eq i8 %x, 33
+ br i1 %34, label %bb67, label %bb68
+
+bb67: ; preds = %bb66
+ store i8 33, ptr %_0, align 1
+ br label %bb105
+
+bb68: ; preds = %bb66
+ %35 = icmp eq i8 %x, 34
+ br i1 %35, label %bb69, label %bb70
+
+bb69: ; preds = %bb68
+ store i8 34, ptr %_0, align 1
+ br label %bb105
+
+bb70: ; preds = %bb68
+ %36 = icmp eq i8 %x, 35
+ br i1 %36, label %bb71, label %bb72
+
+bb71: ; preds = %bb70
+ store i8 35, ptr %_0, align 1
+ br label %bb105
+
+bb72: ; preds = %bb70
+ %37 = icmp eq i8 %x, 36
+ br i1 %37, label %bb73, label %bb74
+
+bb73: ; preds = %bb72
+ store i8 36, ptr %_0, align 1
+ br label %bb105
+
+bb74: ; preds = %bb72
+ %38 = icmp eq i8 %x, 37
+ br i1 %38, label %bb75, label %bb76
+
+bb75: ; preds = %bb74
+ store i8 37, ptr %_0, align 1
+ br label %bb105
+
+bb76: ; preds = %bb74
+ %39 = icmp eq i8 %x, 38
+ br i1 %39, label %bb77, label %bb78
+
+bb77: ; preds = %bb76
+ store i8 38, ptr %_0, align 1
+ br label %bb105
+
+bb78: ; preds = %bb76
+ %40 = icmp eq i8 %x, 39
+ br i1 %40, label %bb79, label %bb80
+
+bb79: ; preds = %bb78
+ store i8 39, ptr %_0, align 1
+ br label %bb105
+
+bb80: ; preds = %bb78
+ %41 = icmp eq i8 %x, 40
+ br i1 %41, label %bb81, label %bb82
+
+bb81: ; preds = %bb80
+ store i8 40, ptr %_0, align 1
+ br label %bb105
+
+bb82: ; preds = %bb80
+ %42 = icmp eq i8 %x, 41
+ br i1 %42, label %bb83, label %bb84
+
+bb83: ; preds = %bb82
+ store i8 41, ptr %_0, align 1
+ br label %bb105
+
+bb84: ; preds = %bb82
+ %43 = icmp eq i8 %x, 42
+ br i1 %43, label %bb85, label %bb86
+
+bb85: ; preds = %bb84
+ store i8 42, ptr %_0, align 1
+ br label %bb105
+
+bb86: ; preds = %bb84
+ %44 = icmp eq i8 %x, 43
+ br i1 %44, label %bb87, label %bb88
+
+bb87: ; preds = %bb86
+ store i8 43, ptr %_0, align 1
+ br label %bb105
+
+bb88: ; preds = %bb86
+ %45 = icmp eq i8 %x, 44
+ br i1 %45, label %bb89, label %bb90
+
+bb89: ; preds = %bb88
+ store i8 44, ptr %_0, align 1
+ br label %bb105
+
+bb90: ; preds = %bb88
+ %46 = icmp eq i8 %x, 45
+ br i1 %46, label %bb91, label %bb92
+
+bb91: ; preds = %bb90
+ store i8 45, ptr %_0, align 1
+ br label %bb105
+
+bb92: ; preds = %bb90
+ %47 = icmp eq i8 %x, 46
+ br i1 %47, label %bb93, label %bb94
+
+bb93: ; preds = %bb92
+ store i8 46, ptr %_0, align 1
+ br label %bb105
+
+bb94: ; preds = %bb92
+ %48 = icmp eq i8 %x, 47
+ br i1 %48, label %bb95, label %bb96
+
+bb95: ; preds = %bb94
+ store i8 47, ptr %_0, align 1
+ br label %bb105
+
+bb96: ; preds = %bb94
+ %49 = icmp eq i8 %x, 48
+ br i1 %49, label %bb97, label %bb98
+
+bb97: ; preds = %bb96
+ store i8 48, ptr %_0, align 1
+ br label %bb105
+
+bb98: ; preds = %bb96
+ %50 = icmp eq i8 %x, 49
+ br i1 %50, label %bb99, label %bb100
+
+bb99: ; preds = %bb98
+ store i8 49, ptr %_0, align 1
+ br label %bb105
+
+bb100: ; preds = %bb98
+ %51 = icmp eq i8 %x, 50
+ br i1 %51, label %bb101, label %bb102
+
+bb101: ; preds = %bb100
+ store i8 50, ptr %_0, align 1
+ br label %bb105
+
+bb102: ; preds = %bb100
+ %52 = icmp eq i8 %x, 51
+ br i1 %52, label %bb103, label %bb104
+
+bb103: ; preds = %bb102
+ store i8 51, ptr %_0, align 1
+ br label %bb105
+
+bb104: ; preds = %bb102
+ store i8 52, ptr %_0, align 1
+ br label %bb105
+}
+
+; Make sure the call is inlined.
+define i8 @test2(i8 %x) {
+; CHECK-LABEL: define range(i8 0, 53) i8 @test2(
+; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[CALL:%.*]] = tail call range(i8 0, 53) i8 @llvm.umin.i8(i8 [[X]], i8 52)
+; CHECK-NEXT: ret i8 [[CALL]]
+;
+ %call = call i8 @test(i8 %x)
+ ret i8 %call
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index cf62fd5..a8880274 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -4,21 +4,14 @@
define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: @test_add_sdiv(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2
-; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
-; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4
-; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4
-; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4
; CHECK-NEXT: ret void
@@ -58,21 +51,14 @@ entry:
define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: @test_add_udiv(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
-; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
-; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
-; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
-; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll
index c9063d3..25267dc 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes='simplifycfg' < %s | FileCheck %s --check-prefix=OPTNOLUT
+; RUN: opt -S -passes='simplifycfg<switch-to-arithmetic>' < %s | FileCheck %s --check-prefix=OPTNOLUT
; RUN: %if amdgpu-registered-target %{ opt -mtriple=amdgcn--amdpal -S -passes='simplifycfg<switch-to-lookup>' < %s | FileCheck %s --check-prefix=TTINOLUT %}
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -7,23 +7,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i32 @linear_transform_with_default(i32 %x) {
; OPTNOLUT-LABEL: define i32 @linear_transform_with_default(
; OPTNOLUT-SAME: i32 [[X:%.*]]) {
-; OPTNOLUT-NEXT: [[ENTRY:.*]]:
-; OPTNOLUT-NEXT: switch i32 [[X]], label %[[END:.*]] [
-; OPTNOLUT-NEXT: i32 0, label %[[CASE0:.*]]
-; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]]
-; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]]
-; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]]
-; OPTNOLUT-NEXT: ]
-; OPTNOLUT: [[CASE0]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[CASE1]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[CASE2]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[CASE3]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[END]]:
-; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 1, %[[CASE0]] ], [ 4, %[[CASE1]] ], [ 7, %[[CASE2]] ], [ 10, %[[CASE3]] ], [ 13, %[[ENTRY]] ]
+; OPTNOLUT-NEXT: [[ENTRY:.*:]]
+; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 4
+; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3
+; OPTNOLUT-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1
+; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 13
; OPTNOLUT-NEXT: ret i32 [[IDX]]
;
; TTINOLUT-LABEL: define i32 @linear_transform_with_default(
@@ -138,26 +126,8 @@ end:
define i32 @linear_transform_no_default(i32 %x) {
; OPTNOLUT-LABEL: define i32 @linear_transform_no_default(
; OPTNOLUT-SAME: i32 [[X:%.*]]) {
-; OPTNOLUT-NEXT: [[ENTRY:.*]]:
-; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [
-; OPTNOLUT-NEXT: i32 0, label %[[END:.*]]
-; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]]
-; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]]
-; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]]
-; OPTNOLUT-NEXT: i32 4, label %[[CASE4:.*]]
-; OPTNOLUT-NEXT: ]
-; OPTNOLUT: [[CASE1]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[CASE2]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[CASE3]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[CASE4]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[DEFAULT]]:
-; OPTNOLUT-NEXT: unreachable
-; OPTNOLUT: [[END]]:
-; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = phi i32 [ 3, %[[CASE1]] ], [ 6, %[[CASE2]] ], [ 9, %[[CASE3]] ], [ 12, %[[CASE4]] ], [ 0, %[[ENTRY]] ]
+; OPTNOLUT-NEXT: [[ENTRY:.*:]]
+; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3
; OPTNOLUT-NEXT: ret i32 [[SWITCH_IDX_MULT]]
;
; TTINOLUT-LABEL: define i32 @linear_transform_no_default(
@@ -350,18 +320,9 @@ end:
define i32 @single_value_withdefault(i32 %x) {
; OPTNOLUT-LABEL: define i32 @single_value_withdefault(
; OPTNOLUT-SAME: i32 [[X:%.*]]) {
-; OPTNOLUT-NEXT: [[ENTRY:.*]]:
-; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [
-; OPTNOLUT-NEXT: i32 0, label %[[END:.*]]
-; OPTNOLUT-NEXT: i32 1, label %[[END]]
-; OPTNOLUT-NEXT: i32 2, label %[[END]]
-; OPTNOLUT-NEXT: i32 3, label %[[END]]
-; OPTNOLUT-NEXT: i32 4, label %[[END]]
-; OPTNOLUT-NEXT: ]
-; OPTNOLUT: [[DEFAULT]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[END]]:
-; OPTNOLUT-NEXT: [[DOT:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ]
+; OPTNOLUT-NEXT: [[ENTRY:.*:]]
+; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5
+; OPTNOLUT-NEXT: [[DOT:%.*]] = select i1 [[TMP0]], i32 2, i32 3
; OPTNOLUT-NEXT: ret i32 [[DOT]]
;
; TTINOLUT-LABEL: define i32 @single_value_withdefault(
@@ -401,18 +362,9 @@ end:
define i32 @single_value_no_jump_tables(i32 %x) "no-jump-tables"="true" {
; OPTNOLUT-LABEL: define i32 @single_value_no_jump_tables(
; OPTNOLUT-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-; OPTNOLUT-NEXT: [[ENTRY:.*]]:
-; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [
-; OPTNOLUT-NEXT: i32 0, label %[[END:.*]]
-; OPTNOLUT-NEXT: i32 1, label %[[END]]
-; OPTNOLUT-NEXT: i32 2, label %[[END]]
-; OPTNOLUT-NEXT: i32 3, label %[[END]]
-; OPTNOLUT-NEXT: i32 4, label %[[END]]
-; OPTNOLUT-NEXT: ]
-; OPTNOLUT: [[DEFAULT]]:
-; OPTNOLUT-NEXT: br label %[[END]]
-; OPTNOLUT: [[END]]:
-; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ]
+; OPTNOLUT-NEXT: [[ENTRY:.*:]]
+; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5
+; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 2, i32 3
; OPTNOLUT-NEXT: ret i32 [[IDX]]
;
; TTINOLUT-LABEL: define i32 @single_value_no_jump_tables(
@@ -449,6 +401,60 @@ end:
ret i32 %idx
}
+define i1 @single_value_with_mask(i32 %x) {
+; OPTNOLUT-LABEL: define i1 @single_value_with_mask(
+; OPTNOLUT-SAME: i32 [[X:%.*]]) {
+; OPTNOLUT-NEXT: [[ENTRY:.*]]:
+; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [
+; OPTNOLUT-NEXT: i32 18, label %[[END:.*]]
+; OPTNOLUT-NEXT: i32 21, label %[[END]]
+; OPTNOLUT-NEXT: i32 48, label %[[END]]
+; OPTNOLUT-NEXT: i32 16, label %[[END]]
+; OPTNOLUT-NEXT: ]
+; OPTNOLUT: [[DEFAULT]]:
+; OPTNOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80
+; OPTNOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true
+; OPTNOLUT-NEXT: br label %[[END]]
+; OPTNOLUT: [[END]]:
+; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ [[SEL]], %[[DEFAULT]] ]
+; OPTNOLUT-NEXT: ret i1 [[RES]]
+;
+; TTINOLUT-LABEL: define i1 @single_value_with_mask(
+; TTINOLUT-SAME: i32 [[X:%.*]]) {
+; TTINOLUT-NEXT: [[ENTRY:.*]]:
+; TTINOLUT-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i32 [[X]], 16
+; TTINOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 33
+; TTINOLUT-NEXT: [[SWITCH_MASKINDEX:%.*]] = zext i32 [[SWITCH_TABLEIDX]] to i64
+; TTINOLUT-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i64 4294967333, [[SWITCH_MASKINDEX]]
+; TTINOLUT-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i64 [[SWITCH_SHIFTED]] to i1
+; TTINOLUT-NEXT: [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false
+; TTINOLUT-NEXT: br i1 [[OR_COND]], label %[[END:.*]], label %[[DEFAULT:.*]]
+; TTINOLUT: [[DEFAULT]]:
+; TTINOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80
+; TTINOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true
+; TTINOLUT-NEXT: br label %[[END]]
+; TTINOLUT: [[END]]:
+; TTINOLUT-NEXT: [[RES:%.*]] = phi i1 [ [[SEL]], %[[DEFAULT]] ], [ false, %[[ENTRY]] ]
+; TTINOLUT-NEXT: ret i1 [[RES]]
+;
+entry:
+ switch i32 %x, label %default [
+ i32 18, label %end
+ i32 21, label %end
+ i32 48, label %end
+ i32 16, label %end
+ ]
+
+default:
+ %cmp = icmp eq i32 %x, 80
+ %sel = select i1 %cmp, i1 false, i1 true
+ br label %end
+
+end:
+ %res = phi i1 [ false, %entry ], [ false, %entry ], [ false, %entry ], [ false, %entry ], [ %sel, %default ]
+ ret i1 %res
+}
+
define i32 @lookup_table(i32 %x) {
; OPTNOLUT-LABEL: define i32 @lookup_table(
; OPTNOLUT-SAME: i32 [[X:%.*]]) {
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s
index bdc02d4..a540d7d 100644
--- a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s
+++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s
@@ -4,7 +4,7 @@ AMOAND_D: ---
AMOAND_D-NEXT: mode: latency
AMOAND_D-NEXT: key:
AMOAND_D-NEXT: instructions:
-AMOAND_D-NEXT: - 'AMOAND_D [[RE01:X[0-9]+]] X10 [[RE01:X[0-9]+]]'
+AMOAND_D-NEXT: - 'AMOAND_D [[RE01:X[0-9]+]] [[RE01:X[0-9]+]] X10'
AMOAND_D-NEXT: config: ''
AMOAND_D-NEXT: register_initial_values:
AMOAND_D-NEXT: - '[[RE01:X[0-9]+]]=0x0'
@@ -16,7 +16,7 @@ AMOADD_W: ---
AMOADD_W-NEXT: mode: latency
AMOADD_W-NEXT: key:
AMOADD_W-NEXT: instructions:
-AMOADD_W-NEXT: - 'AMOADD_W [[RE02:X[0-9]+]] X10 [[RE02:X[0-9]+]]'
+AMOADD_W-NEXT: - 'AMOADD_W [[RE02:X[0-9]+]] [[RE02:X[0-9]+]] X10'
AMOADD_W-NEXT: config: ''
AMOADD_W-NEXT: register_initial_values:
AMOADD_W-NEXT: - '[[RE02:X[0-9]+]]=0x0'
@@ -28,7 +28,7 @@ AMOMAXU_D: ---
AMOMAXU_D-NEXT: mode: latency
AMOMAXU_D-NEXT: key:
AMOMAXU_D-NEXT: instructions:
-AMOMAXU_D-NEXT: - 'AMOMAXU_D [[RE03:X[0-9]+]] X10 [[RE03:X[0-9]+]]'
+AMOMAXU_D-NEXT: - 'AMOMAXU_D [[RE03:X[0-9]+]] [[RE03:X[0-9]+]] X10'
AMOMAXU_D-NEXT: config: ''
AMOMAXU_D-NEXT: register_initial_values:
AMOMAXU_D-NEXT: - '[[RE03:X[0-9]+]]=0x0'
@@ -40,7 +40,7 @@ AMOMIN_W: ---
AMOMIN_W-NEXT: mode: latency
AMOMIN_W-NEXT: key:
AMOMIN_W-NEXT: instructions:
-AMOMIN_W-NEXT: - 'AMOMIN_W [[RE04:X[0-9]+]] X10 [[RE04:X[0-9]+]]'
+AMOMIN_W-NEXT: - 'AMOMIN_W [[RE04:X[0-9]+]] [[RE04:X[0-9]+]] X10'
AMOMIN_W-NEXT: config: ''
AMOMIN_W-NEXT: register_initial_values:
AMOMIN_W-NEXT: - '[[RE04:X[0-9]+]]=0x0'
@@ -52,7 +52,7 @@ AMOXOR_D: ---
AMOXOR_D-NEXT: mode: latency
AMOXOR_D-NEXT: key:
AMOXOR_D-NEXT: instructions:
-AMOXOR_D-NEXT: - 'AMOXOR_D [[RE05:X[0-9]+]] X10 [[RE05:X[0-9]+]]'
+AMOXOR_D-NEXT: - 'AMOXOR_D [[RE05:X[0-9]+]] [[RE05:X[0-9]+]] X10'
AMOXOR_D-NEXT: config: ''
AMOXOR_D-NEXT: register_initial_values:
AMOXOR_D-NEXT: - '[[RE05:X[0-9]+]]=0x0'