310 files changed, 52437 insertions, 15502 deletions
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
index b1fe7b1..7ba422d 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
@@ -615,22 +615,14 @@ define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) {
 ; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption'
 ; CHECK-NEXT:  Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption
 ; CHECK-NEXT:    %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption
-; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:  Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4611686018427387903
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
 ;
 entry:
   call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 4) ]
@@ -652,22 +644,14 @@ define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) {
 ; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption'
 ; CHECK-NEXT:  Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption
 ; CHECK-NEXT:    %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption
-; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:  Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4611686018427387903
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
 ;
 entry:
   call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
diff --git a/llvm/test/Assembler/ConstantExprFold.ll b/llvm/test/Assembler/ConstantExprFold.ll
index 840ed06..33ee492 100644
--- a/llvm/test/Assembler/ConstantExprFold.ll
+++ b/llvm/test/Assembler/ConstantExprFold.ll
@@ -30,9 +30,9 @@
 ; Need a function to make update_test_checks.py work.
 ;.
 ; CHECK: @A = global i64 0
-; CHECK: @add = global ptr @A
-; CHECK: @sub = global ptr @A
-; CHECK: @xor = global ptr @A
+; CHECK: @add = global ptr inttoptr (i64 ptrtoint (ptr @A to i64) to ptr)
+; CHECK: @sub = global ptr inttoptr (i64 ptrtoint (ptr @A to i64) to ptr)
+; CHECK: @xor = global ptr inttoptr (i64 ptrtoint (ptr @A to i64) to ptr)
 ; CHECK: @B = external global %Ty
 ; CHECK: @cons = weak global i32 0, align 8
 ; CHECK: @gep1 = global <2 x ptr> undef
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index 8c1a763..aef7810 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -516,6 +516,11 @@ define void @f93() sanitize_realtime_blocking {
         ret void;
 }
 
+; CHECK: define void @f_sanitize_alloc_token() #55
+define void @f_sanitize_alloc_token() sanitize_alloc_token {
+        ret void;
+}
+
 ; CHECK: define void @f87() [[FNRETTHUNKEXTERN:#[0-9]+]]
 define void @f87() fn_ret_thunk_extern { ret void }
 
@@ -627,6 +632,7 @@ define void @dead_on_return(ptr dead_on_return %p) {
 ; CHECK: attributes #52 = { nosanitize_bounds }
 ; CHECK: attributes #53 = { sanitize_realtime }
 ; CHECK: attributes #54 = { sanitize_realtime_blocking }
+; CHECK: attributes #55 = { sanitize_alloc_token }
 ; CHECK: attributes [[FNRETTHUNKEXTERN]] = { fn_ret_thunk_extern }
 ; CHECK: attributes [[SKIPPROFILE]] = { skipprofile }
 ; CHECK: attributes [[OPTDEBUG]] = { optdebug }
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index 0b5ce08..e21786e 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -1718,7 +1718,7 @@ exit:
   ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
 
   call void @f.nobuiltin() builtin
-  ; CHECK: call void @f.nobuiltin() #54
+  ; CHECK: call void @f.nobuiltin() #55
 
   call fastcc noalias ptr @f.noalias() noinline
   ; CHECK: call fastcc noalias ptr @f.noalias() #12
@@ -2151,6 +2151,9 @@ declare void @f.sanitize_realtime() sanitize_realtime
 declare void @f.sanitize_realtime_blocking() sanitize_realtime_blocking
 ; CHECK: declare void @f.sanitize_realtime_blocking() #53
 
+declare void @f.sanitize_alloc_token() sanitize_alloc_token
+; CHECK: declare void @f.sanitize_alloc_token() #54
+
 ; CHECK: declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan))
 declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan))
 
@@ -2284,7 +2287,8 @@ define float @nofpclass_callsites(float %arg, { float } %arg1) {
 ; CHECK: attributes #51 = { sanitize_numerical_stability }
 ; CHECK: attributes #52 = { sanitize_realtime }
 ; CHECK: attributes #53 = { sanitize_realtime_blocking }
-; CHECK: attributes #54 = { builtin }
+; CHECK: attributes #54 = { sanitize_alloc_token }
+; CHECK: attributes #55 = { builtin }
 
 ;; Metadata
 
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index e810fcb6..f01422e 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -123,6 +123,7 @@ set(LLVM_TEST_DEPENDS
   llvm-objdump
   llvm-opt-fuzzer
   llvm-opt-report
+  llvm-offload-binary
   llvm-offload-wrapper
   llvm-otool
   llvm-pdbutil
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
index 6362ed6..9381f0f4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
@@ -1,11 +1,12 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
-# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
 ...
 ---
 name:            fconstant_to_constant_s32
 alignment:       4
 tracksRegLiveness: true
+legalized:       true
 frameInfo:
   maxAlignment:    1
 machineFunctionInfo: {}
@@ -24,16 +25,17 @@ body:             |
     ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
     ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
-    %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
-    %1:_(s64) = G_CONSTANT i64 524
-    %2:_(p0) = G_PTR_ADD %0, %1(s64)
-    G_STORE %3(s32), %2(p0) :: (store (s32))
+    %1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
+    %2:_(s64) = G_CONSTANT i64 524
+    %3:_(p0) = G_PTR_ADD %0, %2(s64)
+    G_STORE %1(s32), %3(p0) :: (store (s32))
     RET_ReallyLR
 ...
 ---
 name:            fconstant_to_constant_s64
 alignment:       4
 tracksRegLiveness: true
+legalized:       true
 frameInfo:
   maxAlignment:    1
 machineFunctionInfo: {}
@@ -48,7 +50,7 @@ body:             |
     ; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64))
     ; CHECK-NEXT: RET_ReallyLR
     %ptr:_(p0) = COPY $x0
-    %c:_(s64) = G_FCONSTANT double 0.0
+    %c:_(s64) = G_FCONSTANT double 0.000000e+00
     G_STORE %c(s64), %ptr(p0) :: (store (s64))
     RET_ReallyLR
 ...
@@ -56,6 +58,7 @@ body:             |
 name:            no_store_means_no_combine
 alignment:       4
 tracksRegLiveness: true
+legalized:       true
 frameInfo:
   maxAlignment:    1
 machineFunctionInfo: {}
@@ -71,7 +74,7 @@ body:             |
     ; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c
     ; CHECK-NEXT: RET_ReallyLR implicit %add(s64)
     %v:_(s64) = COPY $x0
-    %c:_(s64) = G_FCONSTANT double 0.0
+    %c:_(s64) = G_FCONSTANT double 0.000000e+00
     %add:_(s64) = G_FADD %v, %c
-    RET_ReallyLR implicit %add
+    RET_ReallyLR implicit %add(s64)
 ...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
index c301e76..c00ce22 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
@@ -48,8 +48,9 @@ body: |
     ; CHECK-NEXT: $w0 = COPY [[C]](s32)
     ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
     ; CHECK-NEXT: $x0 = COPY [[C1]](s64)
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK-NEXT: $w0 = COPY [[C2]](s32)
+    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+    ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = G_FCONSTANT float 1.0
     $w0 = COPY %0
     %1:_(s64) = G_FCONSTANT double 2.0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
index ddf219d..c6df345 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
@@ -8,7 +8,7 @@ tracksRegLiveness: true
 body: |
   bb.0:
     ; NO-FP16-LABEL: name: fp16
-    ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
+    ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
     ; NO-FP16-NEXT: $h0 = COPY %cst(s16)
     ; NO-FP16-NEXT: RET_ReallyLR implicit $h0
     ;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
 body: |
   bb.0:
     ; NO-FP16-LABEL: name: fp16_non_zero
-    ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
+    ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
     ; NO-FP16-NEXT: $h0 = COPY %cst(s16)
     ; NO-FP16-NEXT: RET_ReallyLR implicit $h0
     ;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
 body:             |
   bb.1.entry:
     ; NO-FP16-LABEL: name: nan
-    ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
+    ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
     ; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
     ; NO-FP16-NEXT: $w0 = COPY %ext(s32)
     ; NO-FP16-NEXT: RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index cb5df07..322a96a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
 ;
 ; GISEL-LABEL: postidx32_shalf:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #0 ; =0x0
-; GISEL-NEXT:    ldr h1, [x0], #4
-; GISEL-NEXT:    fmov s2, w8
+; GISEL-NEXT:    movi d1, #0000000000000000
+; GISEL-NEXT:    ldr h2, [x0], #4
 ; GISEL-NEXT:    ; kill: def $h0 killed $h0 def $s0
 ; GISEL-NEXT:    fmov w9, s0
-; GISEL-NEXT:    fcvt s3, h1
-; GISEL-NEXT:    fmov w8, s1
-; GISEL-NEXT:    fcvt s2, h2
-; GISEL-NEXT:    fcmp s3, s2
+; GISEL-NEXT:    fcvt s3, h2
+; GISEL-NEXT:    fmov w8, s2
+; GISEL-NEXT:    fcvt s1, h1
+; GISEL-NEXT:    fcmp s3, s1
 ; GISEL-NEXT:    csel w8, w8, w9, mi
 ; GISEL-NEXT:    strh w8, [x1]
 ; GISEL-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir
new file mode 100644
index 0000000..074f75a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            saddlp1d
+legalized:       true
+regBankSelected: false
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: saddlp1d
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+    ; CHECK-NEXT: [[SADDLP:%[0-9]+]]:fpr(s64) = G_SADDLP [[LOAD]]
+    ; CHECK-NEXT: $d0 = COPY [[SADDLP]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:_(p0) = COPY $x0
+    %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>))
+    %2:_(s64) = G_SADDLP %1
+    $d0 = COPY %2(s64)
+    RET_ReallyLR implicit $d0
+...
+---
+name:            uaddlp1d
+legalized:       true
+regBankSelected: false
+failedISel:      false
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: uaddlp1d
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+    ; CHECK-NEXT: [[UADDLP:%[0-9]+]]:fpr(s64) = G_UADDLP [[LOAD]]
+    ; CHECK-NEXT: $d0 = COPY [[UADDLP]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:_(p0) = COPY $x0
+    %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>))
+    %2:_(s64) = G_UADDLP %1
+    $d0 = COPY %2(s64)
+    RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index 938712a..3cf0115 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=arm64-eabi -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for saddlp1d
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uaddlp1d
+; RUN: llc < %s -mtriple=arm64-eabi -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <8 x i8> @addhn8b(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: addhn8b:
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index f829227..dc35224 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -563,6 +563,41 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
 ; CHECK-NEXT:     .seh_endfunclet
 ; CHECK-NEXT:     .seh_endproc
 
+declare void @"??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"()
+; CHECK-LABEL:       .def    "??$exit_thunk@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@";
+; CHECK-NEXT:        .scl    2;
+; CHECK-NEXT:        .type   32;
+; CHECK-NEXT:        .endef
+; CHECK-NEXT:        .section        .wowthk$aa,"xr",discard,"??$exit_thunk@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT:        .globl  "??$exit_thunk@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@" // -- Begin function ??$exit_thunk@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@
+; CHECK-NEXT:        .p2align        2
+; CHECK-NEXT: "??$exit_thunk@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@": // @"??$exit_thunk@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT:         .weak_anti_dep  "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT: "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@" = "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT:         .weak_anti_dep  "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT: "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@" = "??$exit_thunk@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT: .seh_proc "??$exit_thunk@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT:         str     x30, [sp, #-16]!                // 8-byte Folded Spill
+; CHECK-NEXT:         .seh_save_reg_x x30, 16
+; CHECK-NEXT:         .seh_endprologue
+; CHECK-NEXT:         adrp    x8, __os_arm64x_check_icall
+; CHECK-NEXT:         adrp    x11, "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT:         add     x11, x11, :lo12:"??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT:         ldr     x8, [x8, :lo12:__os_arm64x_check_icall]
+; CHECK-NEXT:         adrp    x10, $iexit_thunk$cdecl$v$v
+; CHECK-NEXT:         add     x10, x10, :lo12:$iexit_thunk$cdecl$v$v
+; CHECK-NEXT:         blr     x8
+; CHECK-NEXT:         .seh_startepilogue
+; CHECK-NEXT:         ldr     x30, [sp], #16                  // 8-byte Folded Reload
+; CHECK-NEXT:         .seh_save_reg_x x30, 16
+; CHECK-NEXT:         .seh_endepilogue
+; CHECK-NEXT:         br      x11
+; CHECK-NEXT:         .seh_endfunclet
+; CHECK-NEXT:         .seh_endproc
+
+
+
 ; CHECK-LABEL:    .section        .hybmp$x,"yi"
 ; CHECK-NEXT:     .symidx "#func_caller"
 ; CHECK-NEXT:     .symidx $ientry_thunk$cdecl$v$v
@@ -633,6 +668,12 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
 ; CHECK-NEXT:     .symidx	"#large_vector$exit_thunk"
 ; CHECK-NEXT:     .symidx	large_vector
 ; CHECK-NEXT:     .word	0
+; CHECK-NEXT:     .symidx "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT:     .symidx $iexit_thunk$cdecl$v$v
+; CHECK-NEXT:     .word   4
+; CHECK-NEXT:     .symidx "??$exit_thunk@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@$$h@"
+; CHECK-NEXT:     .symidx "??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"
+; CHECK-NEXT:     .word   0
 
 define void @func_caller() nounwind {
   call void @no_op()
@@ -649,5 +690,6 @@ define void @func_caller() nounwind {
   call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 })
   call <4 x i8> @small_vector(<4 x i8> <i8 0, i8 0, i8 0, i8 0>)
   call <8 x i16> @large_vector(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  call void @"??@md5mangleaaaaaaaaaaaaaaaaaaaaaaa@"()
   ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index adc536d..b234ef7 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) {
 ;
 ; CHECK-CVT-GI-LABEL: test_fccmp:
 ; CHECK-CVT-GI:       // %bb.0:
-; CHECK-CVT-GI-NEXT:    mov w8, #17664 // =0x4500
-; CHECK-CVT-GI-NEXT:    mov w9, #18432 // =0x4800
+; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_0
 ; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-GI-NEXT:    fcvt s2, h0
-; CHECK-CVT-GI-NEXT:    fmov s1, w8
-; CHECK-CVT-GI-NEXT:    fmov s3, w9
-; CHECK-CVT-GI-NEXT:    fmov w9, s0
-; CHECK-CVT-GI-NEXT:    fcvt s1, h1
-; CHECK-CVT-GI-NEXT:    fcvt s3, h3
-; CHECK-CVT-GI-NEXT:    fcmp s2, s1
-; CHECK-CVT-GI-NEXT:    fccmp s2, s3, #4, mi
-; CHECK-CVT-GI-NEXT:    csel w8, w9, w8, gt
+; CHECK-CVT-GI-NEXT:    ldr h1, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_1
+; CHECK-CVT-GI-NEXT:    ldr h4, [x8, :lo12:.LCPI29_1]
+; CHECK-CVT-GI-NEXT:    fmov w8, s0
+; CHECK-CVT-GI-NEXT:    fcvt s3, h1
+; CHECK-CVT-GI-NEXT:    fmov w9, s1
+; CHECK-CVT-GI-NEXT:    fcvt s4, h4
+; CHECK-CVT-GI-NEXT:    fcmp s2, s3
+; CHECK-CVT-GI-NEXT:    fccmp s2, s4, #4, mi
+; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, gt
 ; CHECK-CVT-GI-NEXT:    strh w8, [x0]
 ; CHECK-CVT-GI-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 51aad4fe..7409bfb 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -166,9 +166,9 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI8_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI8_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -206,9 +206,9 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI9_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI9_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -246,9 +246,9 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI10_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI10_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -286,9 +286,9 @@ define i64 @fcvtzs_f16_i64_15(half %flt) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI11_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -470,9 +470,9 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI20_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -510,9 +510,9 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI21_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI21_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -550,9 +550,9 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI22_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI22_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -590,9 +590,9 @@ define i64 @fcvtzu_f16_i64_15(half %flt) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI23_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI23_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -775,10 +775,10 @@ define half @scvtf_f16_i32_7(i32 %int) {
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
 ; CHECK-GI-NO16-NEXT:    scvtf s0, w0
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI32_0
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI32_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -815,10 +815,10 @@ define half @scvtf_f16_i32_15(i32 %int) {
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
 ; CHECK-GI-NO16-NEXT:    scvtf s0, w0
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI33_0
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI33_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -855,10 +855,10 @@ define half @scvtf_f16_i64_7(i64 %long) {
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
 ; CHECK-GI-NO16:       // %bb.0:
 ; CHECK-GI-NO16-NEXT:    scvtf s0, x0
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI34_0
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI34_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -895,10 +895,10 @@ define half @scvtf_f16_i64_15(i64 %long) {
 ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
 ; CHECK-GI-NO16:       // %bb.0:
 ; CHECK-GI-NO16-NEXT:    scvtf s0, x0
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI35_0
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI35_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1079,10 +1079,10 @@ define half @ucvtf_f16_i32_7(i32 %int) {
 ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
 ; CHECK-GI-NO16-NEXT:    ucvtf s0, w0
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI44_0
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI44_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1119,10 +1119,10 @@ define half @ucvtf_f16_i32_15(i32 %int) {
 ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
 ; CHECK-GI-NO16-NEXT:    ucvtf s0, w0
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI45_0
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI45_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1159,10 +1159,10 @@ define half @ucvtf_f16_i64_7(i64 %long) {
 ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7:
 ; CHECK-GI-NO16:       // %bb.0:
 ; CHECK-GI-NO16-NEXT:    ucvtf s0, x0
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI46_0
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI46_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1199,10 +1199,10 @@ define half @ucvtf_f16_i64_15(i64 %long) {
 ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15:
 ; CHECK-GI-NO16:       // %bb.0:
 ; CHECK-GI-NO16-NEXT:    ucvtf s0, x0
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
-; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI47_0
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI47_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1373,9 +1373,9 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI55_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI55_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1413,9 +1413,9 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI56_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI56_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1453,9 +1453,9 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI57_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI57_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1493,9 +1493,9 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI58_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI58_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1667,9 +1667,9 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI66_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI66_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1707,9 +1707,9 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI67_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI67_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1747,9 +1747,9 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI68_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI68_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
@@ -1787,9 +1787,9 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
 ;
 ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15:
 ; CHECK-GI-NO16:       // %bb.0:
-; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    adrp x8, .LCPI69_0
 ; CHECK-GI-NO16-NEXT:    fcvt s0, h0
-; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    ldr h1, [x8, :lo12:.LCPI69_0]
 ; CHECK-GI-NO16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NO16-NEXT:    fcvt h0, s0
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index 91bb8ac..9eacb61 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -12,22 +12,14 @@
 ;                =>
 ;   recip = 1.0 / D; a * recip; b * recip; c * recip;
 define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
-; CHECK-SD-LABEL: three_fdiv_float:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fmov s4, #1.00000000
-; CHECK-SD-NEXT:    fdiv s4, s4, s0
-; CHECK-SD-NEXT:    fmul s0, s1, s4
-; CHECK-SD-NEXT:    fmul s1, s2, s4
-; CHECK-SD-NEXT:    fmul s2, s3, s4
-; CHECK-SD-NEXT:    b foo_3f
-;
-; CHECK-GI-LABEL: three_fdiv_float:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fdiv s4, s1, s0
-; CHECK-GI-NEXT:    fdiv s1, s2, s0
-; CHECK-GI-NEXT:    fdiv s2, s3, s0
-; CHECK-GI-NEXT:    fmov s0, s4
-; CHECK-GI-NEXT:    b foo_3f
+; CHECK-LABEL: three_fdiv_float:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov s4, #1.00000000
+; CHECK-NEXT:    fdiv s4, s4, s0
+; CHECK-NEXT:    fmul s0, s1, s4
+; CHECK-NEXT:    fmul s1, s2, s4
+; CHECK-NEXT:    fmul s2, s3, s4
+; CHECK-NEXT:    b foo_3f
   %div = fdiv arcp float %a, %D
   %div1 = fdiv arcp float %b, %D
   %div2 = fdiv arcp float %c, %D
@@ -36,22 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
 }
 
 define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
-; CHECK-SD-LABEL: three_fdiv_double:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fmov d4, #1.00000000
-; CHECK-SD-NEXT:    fdiv d4, d4, d0
-; CHECK-SD-NEXT:    fmul d0, d1, d4
-; CHECK-SD-NEXT:    fmul d1, d2, d4
-; CHECK-SD-NEXT:    fmul d2, d3, d4
-; CHECK-SD-NEXT:    b foo_3d
-;
-; CHECK-GI-LABEL: three_fdiv_double:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fdiv d4, d1, d0
-; CHECK-GI-NEXT:    fdiv d1, d2, d0
-; CHECK-GI-NEXT:    fdiv d2, d3, d0
-; CHECK-GI-NEXT:    fmov d0, d4
-; CHECK-GI-NEXT:    b foo_3d
+; CHECK-LABEL: three_fdiv_double:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov d4, #1.00000000
+; CHECK-NEXT:    fdiv d4, d4, d0
+; CHECK-NEXT:    fmul d0, d1, d4
+; CHECK-NEXT:    fmul d1, d2, d4
+; CHECK-NEXT:    fmul d2, d3, d4
+; CHECK-NEXT:    b foo_3d
   %div = fdiv arcp double %a, %D
   %div1 = fdiv arcp double %b, %D
   %div2 = fdiv arcp double %c, %D
@@ -60,22 +44,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
 }
 
 define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-SD-LABEL: three_fdiv_4xfloat:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fmov v4.4s, #1.00000000
-; CHECK-SD-NEXT:    fdiv v4.4s, v4.4s, v0.4s
-; CHECK-SD-NEXT:    fmul v0.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT:    fmul v1.4s, v2.4s, v4.4s
-; CHECK-SD-NEXT:    fmul v2.4s, v3.4s, v4.4s
-; CHECK-SD-NEXT:    b foo_3_4xf
-;
-; CHECK-GI-LABEL: three_fdiv_4xfloat:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fdiv v4.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT:    fdiv v1.4s, v2.4s, v0.4s
-; CHECK-GI-NEXT:    fdiv v2.4s, v3.4s, v0.4s
-; CHECK-GI-NEXT:    mov v0.16b, v4.16b
-; CHECK-GI-NEXT:    b foo_3_4xf
+; CHECK-LABEL: three_fdiv_4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov v4.4s, #1.00000000
+; CHECK-NEXT:    fdiv v4.4s, v4.4s, v0.4s
+; CHECK-NEXT:    fmul v0.4s, v1.4s, v4.4s
+; CHECK-NEXT:    fmul v1.4s, v2.4s, v4.4s
+; CHECK-NEXT:    fmul v2.4s, v3.4s, v4.4s
+; CHECK-NEXT:    b foo_3_4xf
   %div = fdiv arcp <4 x float> %a, %D
   %div1 = fdiv arcp <4 x float> %b, %D
   %div2 = fdiv arcp <4 x float> %c, %D
@@ -84,22 +60,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
 }
 
 define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-SD-LABEL: three_fdiv_2xdouble:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fmov v4.2d, #1.00000000
-; CHECK-SD-NEXT:    fdiv v4.2d, v4.2d, v0.2d
-; CHECK-SD-NEXT:    fmul v0.2d, v1.2d, v4.2d
-; CHECK-SD-NEXT:    fmul v1.2d, v2.2d, v4.2d
-; CHECK-SD-NEXT:    fmul v2.2d, v3.2d, v4.2d
-; CHECK-SD-NEXT:    b foo_3_2xd
-;
-; CHECK-GI-LABEL: three_fdiv_2xdouble:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fdiv v4.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT:    fdiv v1.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT:    fdiv v2.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT:    mov v0.16b, v4.16b
-; CHECK-GI-NEXT:    b foo_3_2xd
+; CHECK-LABEL: three_fdiv_2xdouble:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov v4.2d, #1.00000000
+; CHECK-NEXT:    fdiv v4.2d, v4.2d, v0.2d
+; CHECK-NEXT:    fmul v0.2d, v1.2d, v4.2d
+; CHECK-NEXT:    fmul v1.2d, v2.2d, v4.2d
+; CHECK-NEXT:    fmul v2.2d, v3.2d, v4.2d
+; CHECK-NEXT:    b foo_3_2xd
   %div = fdiv arcp <2 x double> %a, %D
   %div1 = fdiv arcp <2 x double> %b, %D
   %div2 = fdiv arcp <2 x double> %c, %D
@@ -135,26 +103,47 @@ define void @two_fdiv_double(double %D, double %a, double %b) {
   ret void
 }
 
-define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
+define void @four_fdiv_multi_float(float %D, float %a, float %b, float %c) #0 {
+; CHECK-SD-LABEL: four_fdiv_multi_float:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT:    fmov v4.4s, #1.00000000
-; CHECK-SD-NEXT:    dup v0.4s, v0.s[0]
-; CHECK-SD-NEXT:    fdiv v4.4s, v4.4s, v0.4s
-; CHECK-SD-NEXT:    fmul v0.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT:    fmul v1.4s, v2.4s, v4.4s
-; CHECK-SD-NEXT:    fmul v2.4s, v3.4s, v4.4s
-; CHECK-SD-NEXT:    b foo_3_4xf
+; CHECK-SD-NEXT:    fmov s4, #1.00000000
+; CHECK-SD-NEXT:    fdiv s5, s4, s0
+; CHECK-SD-NEXT:    fmul s4, s1, s5
+; CHECK-SD-NEXT:    fmul s1, s2, s5
+; CHECK-SD-NEXT:    fmul s2, s3, s5
+; CHECK-SD-NEXT:    fmul s3, s0, s5
+; CHECK-SD-NEXT:    fmov s0, s4
+; CHECK-SD-NEXT:    b foo_4f
 ;
-; CHECK-GI-LABEL: splat_three_fdiv_4xfloat:
+; CHECK-GI-LABEL: four_fdiv_multi_float:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT:    dup v4.4s, v0.s[0]
-; CHECK-GI-NEXT:    fdiv v0.4s, v1.4s, v4.4s
-; CHECK-GI-NEXT:    fdiv v1.4s, v2.4s, v4.4s
-; CHECK-GI-NEXT:    fdiv v2.4s, v3.4s, v4.4s
-; CHECK-GI-NEXT:    b foo_3_4xf
+; CHECK-GI-NEXT:    fmov s4, #1.00000000
+; CHECK-GI-NEXT:    fdiv s5, s4, s0
+; CHECK-GI-NEXT:    fdiv s4, s0, s0
+; CHECK-GI-NEXT:    fmul s0, s1, s5
+; CHECK-GI-NEXT:    fmul s1, s2, s5
+; CHECK-GI-NEXT:    fmul s2, s3, s5
+; CHECK-GI-NEXT:    fmov s3, s4
+; CHECK-GI-NEXT:    b foo_4f
+  %div = fdiv arcp float %a, %D
+  %div1 = fdiv arcp float %b, %D
+  %div2 = fdiv arcp float %c, %D
+  %div3 = fdiv arcp float %D, %D
+  tail call void @foo_4f(float %div, float %div1, float %div2, float %div3)
+  ret void
+}
+
+define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: splat_three_fdiv_4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    fmov v4.4s, #1.00000000
+; CHECK-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-NEXT:    fdiv v4.4s, v4.4s, v0.4s
+; CHECK-NEXT:    fmul v0.4s, v1.4s, v4.4s
+; CHECK-NEXT:    fmul v1.4s, v2.4s, v4.4s
+; CHECK-NEXT:    fmul v2.4s, v3.4s, v4.4s
+; CHECK-NEXT:    b foo_3_4xf
   %D.ins = insertelement <4 x float> poison, float %D, i64 0
   %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
   %div = fdiv arcp <4 x float> %a, %splat
@@ -256,6 +245,7 @@ entry:
 }
 
 declare void @foo_3f(float, float, float)
+declare void @foo_4f(float, float, float, float)
 declare void @foo_3d(double, double, double)
 declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
 declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
index 594a3ab..be07978 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
@@ -38,10 +38,10 @@ define half @add_v2HalfH(<2 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-NOFP16-LABEL: add_v2HalfH:
 ; CHECK-GI-NOFP16:       // %bb.0:
-; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT:    adrp x8, .LCPI1_0
 ; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
+; CHECK-GI-NOFP16-NEXT:    ldr h1, [x8, :lo12:.LCPI1_0]
 ; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[1]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
@@ -88,10 +88,10 @@ define half @add_v3HalfH(<3 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-NOFP16-LABEL: add_v3HalfH:
 ; CHECK-GI-NOFP16:       // %bb.0:
-; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT:    adrp x8, .LCPI2_0
 ; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
+; CHECK-GI-NOFP16-NEXT:    ldr h1, [x8, :lo12:.LCPI2_0]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
@@ -152,10 +152,10 @@ define half @add_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-NOFP16-LABEL: add_HalfH:
 ; CHECK-GI-NOFP16:       // %bb.0:
-; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT:    adrp x8, .LCPI3_0
 ; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
+; CHECK-GI-NOFP16-NEXT:    ldr h1, [x8, :lo12:.LCPI3_0]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
@@ -250,9 +250,9 @@ define half @add_H(<8 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-NOFP16-LABEL: add_H:
 ; CHECK-GI-NOFP16:       // %bb.0:
-; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT:    adrp x8, .LCPI4_0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
+; CHECK-GI-NOFP16-NEXT:    ldr h1, [x8, :lo12:.LCPI4_0]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NOFP16-NEXT:    fadd s1, s1, s2
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
@@ -448,9 +448,9 @@ define half @add_2H(<16 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-NOFP16-LABEL: add_2H:
 ; CHECK-GI-NOFP16:       // %bb.0:
-; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT:    adrp x8, .LCPI7_0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT:    fmov s2, w8
+; CHECK-GI-NOFP16-NEXT:    ldr h2, [x8, :lo12:.LCPI7_0]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
 ; CHECK-GI-NOFP16-NEXT:    fadd s2, s2, s3
 ; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 18f463c..40925da 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -405,26 +405,23 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    movi d0, #0000000000000000
 ; CHECK-GI-NOFP16-NEXT:    mov x8, xzr
-; CHECK-GI-NOFP16-NEXT:    mov w9, #0 // =0x0
 ; CHECK-GI-NOFP16-NEXT:  .LBB13_1: // %loop
 ; CHECK-GI-NOFP16-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT:    ldr d0, [x0, x8]
-; CHECK-GI-NOFP16-NEXT:    fmov s1, w9
+; CHECK-GI-NOFP16-NEXT:    ldr d1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    add x8, x8, #8
 ; CHECK-GI-NOFP16-NEXT:    cmp w8, #56
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT:    faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT:    fadd s0, s1, s0
 ; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-GI-NOFP16-NEXT:    b.ne .LBB13_1
 ; CHECK-GI-NOFP16-NEXT:  // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop:
@@ -521,28 +518,25 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    movi d0, #0000000000000000
 ; CHECK-GI-NOFP16-NEXT:    mov x8, xzr
-; CHECK-GI-NOFP16-NEXT:    mov w9, #0 // =0x0
 ; CHECK-GI-NOFP16-NEXT:  .LBB14_1: // %loop
 ; CHECK-GI-NOFP16-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT:    ldr q0, [x0, x8]
+; CHECK-GI-NOFP16-NEXT:    ldr q1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    add x8, x8, #8
 ; CHECK-GI-NOFP16-NEXT:    cmp w8, #56
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fadd v0.4s, v1.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fmov s1, w9
+; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT:    fadd v1.4s, v2.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT:    faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT:    faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT:    fcvt h1, s1
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT:    fadd s0, s1, s0
 ; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-GI-NOFP16-NEXT:    b.ne .LBB14_1
 ; CHECK-GI-NOFP16-NEXT:  // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop:
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
index e1b2170..c10d6e9 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
@@ -52,10 +52,10 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-NOFP16-LABEL: mul_HalfH:
 ; CHECK-GI-NOFP16:       // %bb.0:
-; CHECK-GI-NOFP16-NEXT:    mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT:    adrp x8, .LCPI1_0
 ; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
+; CHECK-GI-NOFP16-NEXT:    ldr h1, [x8, :lo12:.LCPI1_0]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
@@ -144,9 +144,9 @@ define half @mul_H(<8 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-NOFP16-LABEL: mul_H:
 ; CHECK-GI-NOFP16:       // %bb.0:
-; CHECK-GI-NOFP16-NEXT:    mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT:    adrp x8, .LCPI2_0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
+; CHECK-GI-NOFP16-NEXT:    ldr h1, [x8, :lo12:.LCPI2_0]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NOFP16-NEXT:    fmul s1, s1, s2
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
@@ -321,9 +321,9 @@ define half @mul_2H(<16 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-NOFP16-LABEL: mul_2H:
 ; CHECK-GI-NOFP16:       // %bb.0:
-; CHECK-GI-NOFP16-NEXT:    mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT:    adrp x8, .LCPI5_0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT:    fmov s2, w8
+; CHECK-GI-NOFP16-NEXT:    ldr h2, [x8, :lo12:.LCPI5_0]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
 ; CHECK-GI-NOFP16-NEXT:    fmul s2, s2, s3
 ; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 2d7ef2c..98fbbe1 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -169,6 +169,6 @@ attributes #1 = { nounwind }
 
 ;.
 ; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 664dfa2..2ad6e68 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -1,103 +1,166 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4
 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
 
+; Shrink result attribute list by preventing use of most attributes.
+define internal void @use_most() {
+; CHECK-LABEL: define internal void @use_most(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [256 x i8], align 1, addrspace(5)
+; CHECK-NEXT:    [[ALLOCA_CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.amdgcn.cluster.id.x()
+; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.cluster.id.y()
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.amdgcn.cluster.id.z()
+; CHECK-NEXT:    [[TMP7:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+; CHECK-NEXT:    [[TMP8:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
+; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT:    [[IMPLICIT_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p4.i64(ptr [[ALLOCA_CAST]], ptr addrspace(4) [[IMPLICIT_ARG_PTR]], i64 256, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca [256 x i8], addrspace(5)
+  %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
+  call i32 @llvm.amdgcn.workitem.id.x()
+  call i32 @llvm.amdgcn.workitem.id.y()
+  call i32 @llvm.amdgcn.workitem.id.z()
+  call i32 @llvm.amdgcn.workgroup.id.x()
+  call i32 @llvm.amdgcn.workgroup.id.y()
+  call i32 @llvm.amdgcn.workgroup.id.z()
+  call i32 @llvm.amdgcn.cluster.id.x()
+  call i32 @llvm.amdgcn.cluster.id.y()
+  call i32 @llvm.amdgcn.cluster.id.z()
+  call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+  call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+  call i64 @llvm.amdgcn.dispatch.id()
+  call i32 @llvm.amdgcn.lds.kernel.id()
+  %implicit.arg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  call void @llvm.memcpy.p0.p4(ptr %alloca.cast, ptr addrspace(4) %implicit.arg.ptr, i64 256, i1 false)
+  ret void
+}
+
 define amdgpu_kernel void @kernel_uses_asm_virtreg() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "a"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call i32 asm sideeffect "; def $0", "=a"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call i64 asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call i64 asm sideeffect "; def $0", "={a[0:1]}"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_non_agpr_asm() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "v"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_physreg() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+  call void @use_most()
   ret void
 }
 
 define void @func_uses_asm_virtreg_agpr() {
 ; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "a"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define void @func_uses_asm_physreg_agpr() {
 ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define void @func_uses_asm_physreg_agpr_tuple() {
 ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+  call void @use_most()
   ret void
 }
 
@@ -105,99 +168,119 @@ declare void @unknown()
 
 define amdgpu_kernel void @kernel_calls_extern() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @unknown()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR2]] {
-; CHECK-NEXT:    call void @unknown() #[[ATTR6:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void @unknown() #[[ATTR10:[0-9]+]]
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @unknown() #0
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    call void [[INDIRECT]]()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void %indirect()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR6]]
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR10]]
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void %indirect() #0
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @func_uses_asm_physreg_agpr()
+  call void @use_most()
   ret void
 }
 
 define void @empty() {
 ; CHECK-LABEL: define void @empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
+  call void @use_most()
   ret void
 }
 
 define void @also_empty() {
 ; CHECK-LABEL: define void @also_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_empty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @empty()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @empty()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void @empty()
 ; CHECK-NEXT:    call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @empty()
   call void @func_uses_asm_physreg_agpr()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic(
-; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false)
+  call void @use_most()
   ret void
 }
 
@@ -205,31 +288,35 @@ declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>
 
 define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0)
 ; CHECK-NEXT:    store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
   store <32 x float> %result, ptr addrspace(1) %out
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %result = call i32 @llvm.amdgcn.workitem.id.x()
   store i32 %result, ptr addrspace(1) %out
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 ; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -244,21 +331,476 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 ; CHECK:       5:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       6:
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %fptr = select i1 %cond, ptr @empty, ptr @also_empty
   call void %fptr()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(ptr poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call ptr asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call ptr asm sideeffect "; def $0", "=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call <2 x ptr> asm sideeffect "; def $0", "=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; clobber $0", "~{a4}"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; clobber $0", "~{a[10:13]}"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; clobber $0", "~{a256}"()
+  call void @use_most()
   ret void
 }
 
+define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; clobber $0", "~{a255}"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "{a256}"(i32 poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call <32 x i32> asm sideeffect "; def $0", "=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(<32 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @vreg_use_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(<257 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @vreg_def_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <257 x i32> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call <257 x i32> asm sideeffect "; def $0", "=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @multiple() {
+; CHECK-LABEL: define amdgpu_kernel void @multiple(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @earlyclobber_0() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <8 x i32> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @earlyclobber_1() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1))
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1, $2", "{a16},a,a"(i32 poison, <8 x i32> poison, <16 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call {i32, <8 x i32>, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <16 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call {i32, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,a"(<8 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "{a[1:4]},a"(<4 x i32> poison, <4 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_1(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "a,{a[0:3]}"(<4 x i32> poison, <4 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @physreg_raises_limit() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_raises_limit(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "a,{a[5:8]}"(<4 x i32> poison, <4 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+; FIXME: This should require 9. We cannot allocate an a128 at a0.
+define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_tuple_alignment_raises_limit(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "a,{a[1:4]}"(<4 x i32> poison, <4 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @align3_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align3_virtreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <3 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @align3_align4_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align3_align4_virtreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <4 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @align2_align4_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align2_align4_virtreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "a,a"(<2 x i32> poison, <4 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_a55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META0:![0-9]+]], i32 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !0, i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_v55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META1:![0-9]+]], i32 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !1, i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT:    call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !2, i96 0)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]])
+; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %reg = call i32 @llvm.read_register.i64(metadata !0)
+  store i32 %reg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]])
+; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %reg = call i32 @llvm.read_volatile_register.i64(metadata !0)
+  store i32 %reg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]])
+; CHECK-NEXT:    store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    ret void
+;
+  %reg = call i128 @llvm.read_register.i64(metadata !3)
+  store i128 %reg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.write_register.i64(metadata !4, i32 0)
+  ret void
+}
 
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
+
+!0 = !{!"a55"}
+!1 = !{!"v55"}
+!2 = !{!"a[55:57]"}
+!3 = !{!"a[56:59]"}
+!4 = !{!"a256"}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: [[META0]] = !{!"a55"}
+; CHECK: [[META1]] = !{!"v55"}
+; CHECK: [[META2]] = !{!"a[55:57]"}
+; CHECK: [[META3]] = !{!"a[56:59]"}
+; CHECK: [[META4]] = !{!"a256"}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index fb566e5..9283bd5 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -691,29 +691,29 @@ attributes #6 = { "enqueued-block" }
 
 ;.
 ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR15:[0-9]+]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { nounwind }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "enqueued-block" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 484ff77..8554485 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -474,19 +474,19 @@ attributes #1 = { nounwind }
 ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
 ; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
 ; HSA: [[META0]] = !{i32 1, i32 3, i32 4, i32 10}
 ; HSA: [[META1]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
index 2efe024..e2a2deb 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -294,13 +294,13 @@ attributes #1 = { nounwind }
 
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
index aaedb85..e67d7fdb 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
@@ -3,6 +3,8 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
 
 define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
 ; CI-LABEL: atomic_load_monotonic_i8:
@@ -33,6 +35,14 @@ define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
 ; GFX11-FAKE16-NEXT:    ds_load_u8 v0, v0
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i8:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_u8 v0, v0
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
   ret i8 %load
 }
@@ -66,6 +76,14 @@ define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) {
 ; GFX11-FAKE16-NEXT:    ds_load_u8 v0, v0 offset:16
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i8_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_u8 v0, v0 offset:16
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
   %load = load atomic i8, ptr addrspace(3) %gep monotonic, align 1
   ret i8 %load
@@ -100,6 +118,14 @@ define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) {
 ; GFX11-FAKE16-NEXT:    ds_load_u16 v0, v0
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_u16 v0, v0
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2
   ret i16 %load
 }
@@ -133,6 +159,14 @@ define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) {
 ; GFX11-FAKE16-NEXT:    ds_load_u16 v0, v0 offset:32
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i16_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_u16 v0, v0 offset:32
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
   %load = load atomic i16, ptr addrspace(3) %gep monotonic, align 2
   ret i16 %load
@@ -160,6 +194,14 @@ define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) {
 ; GFX11-NEXT:    ds_load_b32 v0, v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_b32 v0, v0
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %load = load atomic i32, ptr addrspace(3) %ptr monotonic, align 4
   ret i32 %load
 }
@@ -186,6 +228,14 @@ define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) {
 ; GFX11-NEXT:    ds_load_b32 v0, v0 offset:64
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i32_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_b32 v0, v0 offset:64
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
   %load = load atomic i32, ptr addrspace(3) %gep monotonic, align 4
   ret i32 %load
@@ -213,6 +263,14 @@ define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) {
 ; GFX11-NEXT:    ds_load_b64 v[0:1], v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i64:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %load = load atomic i64, ptr addrspace(3) %ptr monotonic, align 8
   ret i64 %load
 }
@@ -239,6 +297,14 @@ define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) {
 ; GFX11-NEXT:    ds_load_b64 v[0:1], v0 offset:128
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i64_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0 offset:128
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i32 16
   %load = load atomic i64, ptr addrspace(3) %gep monotonic, align 8
   ret i64 %load
@@ -266,6 +332,14 @@ define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) {
 ; GFX11-NEXT:    ds_load_b32 v0, v0 offset:64
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f32_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_b32 v0, v0 offset:64
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds float, ptr addrspace(3) %ptr, i32 16
   %load = load atomic float, ptr addrspace(3) %gep monotonic, align 4
   ret float %load
@@ -293,6 +367,14 @@ define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) {
 ; GFX11-NEXT:    ds_load_b64 v[0:1], v0 offset:128
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f64_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0 offset:128
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds double, ptr addrspace(3) %ptr, i32 16
   %load = load atomic double, ptr addrspace(3) %gep monotonic, align 8
   ret double %load
@@ -320,6 +402,14 @@ define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) {
 ; GFX11-NEXT:    ds_load_b64 v[0:1], v0 offset:128
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_p0i8_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_b64 v[0:1], v0 offset:128
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds ptr, ptr addrspace(3) %ptr, i32 16
   %load = load atomic ptr, ptr addrspace(3) %gep monotonic, align 8
   ret ptr %load
@@ -347,6 +437,14 @@ define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr
 ; GFX11-NEXT:    ds_load_b32 v0, v0 offset:64
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_p3i8_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_b32 v0, v0 offset:64
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %ptr, i32 16
   %load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4
   ret ptr addrspace(3) %load
@@ -381,6 +479,14 @@ define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) {
 ; GFX11-FAKE16-NEXT:    ds_load_u16 v0, v0
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_u16 v0, v0
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2
   %ret = bitcast half %load to i16
   ret i16 %ret
@@ -415,6 +521,14 @@ define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) {
 ; GFX11-FAKE16-NEXT:    ds_load_u16 v0, v0 offset:32
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f16_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_u16 v0, v0 offset:32
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16
   %load = load atomic half, ptr addrspace(3) %gep monotonic, align 2
   %ret = bitcast half %load to i16
@@ -450,6 +564,14 @@ define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) {
 ; GFX11-FAKE16-NEXT:    ds_load_u16 v0, v0
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_bf16:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_u16 v0, v0
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2
   %ret = bitcast bfloat %load to i16
   ret i16 %ret
@@ -484,6 +606,14 @@ define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
 ; GFX11-FAKE16-NEXT:    ds_load_u16 v0, v0 offset:32
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_bf16_offset:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_load_u16 v0, v0 offset:32
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16
   %load = load atomic bfloat, ptr addrspace(3) %gep monotonic, align 2
   %ret = bitcast bfloat %load to i16
@@ -491,3 +621,5 @@ define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
+; GFX1250-FAKE16: {{.*}}
+; GFX1250-TRUE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
index c2bb4f00..31065f2 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
@@ -3,6 +3,8 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
 
 define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
 ; CI-LABEL: atomic_store_monotonic_i8:
@@ -41,6 +43,26 @@ define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
 ; GFX11-FAKE16-NEXT:    ds_store_b8 v0, v2
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_i8:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT:    ds_store_b8 v0, v1
+; GFX1250-TRUE16-NEXT:    ds_store_b8_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_i8:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT:    ds_store_b8 v0, v1
+; GFX1250-FAKE16-NEXT:    ds_store_b8 v0, v2
+; GFX1250-FAKE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %val1 = add i8 %val, 2
   store atomic i8 %val, ptr addrspace(3) %ptr monotonic, align 1
   store atomic i8 %val1, ptr addrspace(3) %ptr monotonic, align 1
@@ -84,6 +106,26 @@ define void @atomic_store_monotonic_offset_i8(ptr addrspace(3) %ptr, i8 %val) {
 ; GFX11-FAKE16-NEXT:    ds_store_b8 v0, v2 offset:16
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_i8:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT:    ds_store_b8 v0, v1 offset:8
+; GFX1250-TRUE16-NEXT:    ds_store_b8_d16_hi v0, v1 offset:16
+; GFX1250-TRUE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_i8:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT:    ds_store_b8 v0, v1 offset:8
+; GFX1250-FAKE16-NEXT:    ds_store_b8 v0, v2 offset:16
+; GFX1250-FAKE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %val1 = add i8 %val, 2
   %gep_1 = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 8
   %gep_2 = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
@@ -129,6 +171,26 @@ define void @atomic_store_monotonic_i16(ptr addrspace(3) %ptr, i16 %val) {
 ; GFX11-FAKE16-NEXT:    ds_store_b16 v0, v2
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_i16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT:    ds_store_b16 v0, v1
+; GFX1250-TRUE16-NEXT:    ds_store_b16_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_i16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v1
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v2
+; GFX1250-FAKE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %val1 = add i16 %val, 2
   store atomic i16 %val, ptr addrspace(3) %ptr monotonic, align 2
   store atomic i16 %val1, ptr addrspace(3) %ptr monotonic, align 2
@@ -172,6 +234,26 @@ define void @atomic_store_monotonic_offset_i16(ptr addrspace(3) %ptr, i16 %val)
 ; GFX11-FAKE16-NEXT:    ds_store_b16 v0, v2 offset:32
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_i16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT:    ds_store_b16 v0, v1 offset:32
+; GFX1250-TRUE16-NEXT:    ds_store_b16_d16_hi v0, v1 offset:32
+; GFX1250-TRUE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_i16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v1 offset:32
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v2 offset:32
+; GFX1250-FAKE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %val1 = add i16 %val, 2
   %gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
   store atomic i16 %val, ptr addrspace(3) %gep monotonic, align 2
@@ -201,6 +283,14 @@ define void @atomic_store_monotonic_i32(ptr addrspace(3) %ptr, i32 %val) {
 ; GFX11-NEXT:    ds_store_b32 v0, v1
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_i32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_store_b32 v0, v1
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   store atomic i32 %val, ptr addrspace(3) %ptr monotonic, align 4
   ret void
 }
@@ -227,6 +317,14 @@ define void @atomic_store_monotonic_offset_i32(ptr addrspace(3) %ptr, i32 %val)
 ; GFX11-NEXT:    ds_store_b32 v0, v1 offset:64
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_offset_i32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    ds_store_b32 v0, v1 offset:64
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
   store atomic i32 %val, ptr addrspace(3) %gep monotonic, align 4
   ret void
@@ -254,6 +352,15 @@ define void @atomic_store_monotonic_i64(ptr addrspace(3) %ptr, i64 %val) {
 ; GFX11-NEXT:    ds_store_b64 v0, v[1:2]
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_i64:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-NEXT:    ds_store_b64 v0, v[2:3]
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   store atomic i64 %val, ptr addrspace(3) %ptr monotonic, align 8
   ret void
 }
@@ -280,6 +387,15 @@ define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val)
 ; GFX11-NEXT:    ds_store_b64 v0, v[1:2] offset:128
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_offset_i64:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-NEXT:    ds_store_b64 v0, v[2:3] offset:128
+; GFX1250-NEXT:    s_wait_dscnt 0x0
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i64 16
   store atomic i64 %val, ptr addrspace(3) %gep monotonic, align 8
   ret void
@@ -322,6 +438,26 @@ define void @atomic_store_monotonic_f16(ptr addrspace(3) %ptr, i16 %arg.val) {
 ; GFX11-FAKE16-NEXT:    ds_store_b16 v0, v2
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_f16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT:    ds_store_b16 v0, v1
+; GFX1250-TRUE16-NEXT:    ds_store_b16_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_f16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v1
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v2
+; GFX1250-FAKE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %arg.val1 = add i16 %arg.val, 2
   %val = bitcast i16 %arg.val to half
   %val1 = bitcast i16 %arg.val1 to half
@@ -367,6 +503,26 @@ define void @atomic_store_monotonic_offset_f16(ptr addrspace(3) %ptr, i16 %arg.v
 ; GFX11-FAKE16-NEXT:    ds_store_b16 v0, v2 offset:32
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_f16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT:    ds_store_b16 v0, v1 offset:32
+; GFX1250-TRUE16-NEXT:    ds_store_b16_d16_hi v0, v1 offset:32
+; GFX1250-TRUE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_f16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v1 offset:32
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v2 offset:32
+; GFX1250-FAKE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %arg.val1 = add i16 %arg.val, 2
   %val1 = bitcast i16 %arg.val1 to half
   %val = bitcast i16 %arg.val to half
@@ -413,6 +569,26 @@ define void @atomic_store_monotonic_bf16(ptr addrspace(3) %ptr, i16 %arg.val) {
 ; GFX11-FAKE16-NEXT:    ds_store_b16 v0, v2
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_bf16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT:    ds_store_b16 v0, v1
+; GFX1250-TRUE16-NEXT:    ds_store_b16_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_bf16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v1
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v2
+; GFX1250-FAKE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %arg.val1 = add i16 %arg.val, 2
   %val1 = bitcast i16 %arg.val1 to bfloat
   %val = bitcast i16 %arg.val to bfloat
@@ -458,6 +634,26 @@ define void @atomic_store_monotonic_offset_bf16(ptr addrspace(3) %ptr, i16 %arg.
 ; GFX11-FAKE16-NEXT:    ds_store_b16 v0, v2 offset:32
 ; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_bf16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT:    ds_store_b16 v0, v1 offset:32
+; GFX1250-TRUE16-NEXT:    ds_store_b16_d16_hi v0, v1 offset:32
+; GFX1250-TRUE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_bf16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v1 offset:32
+; GFX1250-FAKE16-NEXT:    ds_store_b16 v0, v2 offset:32
+; GFX1250-FAKE16-NEXT:    s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %arg.val1 = add i16 %arg.val, 2
   %val1 = bitcast i16 %arg.val1 to bfloat
   %val = bitcast i16 %arg.val to bfloat
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
index f63dd6e..c90611f 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
@@ -147,10 +147,10 @@ define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %
 
 attributes #0 = { "amdgpu-no-flat-scratch-init" }
 ;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
 ; GFX9: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
 ;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
 ; GFX10: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
 ;.
 ; GFX9: [[META0]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
index 60cd252..c005695a 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
@@ -723,7 +723,7 @@ define void @also_empty() {
 
 define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
 ; GFX9-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
-; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
+; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
 ; GFX9-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
 ; GFX9-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
 ; GFX9-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
@@ -741,7 +741,7 @@ define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
-; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
+; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
 ; GFX10-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
 ; GFX10-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
 ; GFX10-NEXT:    br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
@@ -767,13 +767,13 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
 
 define void @use_intrinsic_workitem_id_x() {
 ; GFX9-LABEL: define void @use_intrinsic_workitem_id_x(
-; GFX9-SAME: ) #[[ATTR5:[0-9]+]] {
+; GFX9-SAME: ) #[[ATTR4:[0-9]+]] {
 ; GFX9-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; GFX9-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @use_intrinsic_workitem_id_x(
-; GFX10-SAME: ) #[[ATTR5:[0-9]+]] {
+; GFX10-SAME: ) #[[ATTR4:[0-9]+]] {
 ; GFX10-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; GFX10-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
 ; GFX10-NEXT:    ret void
@@ -803,12 +803,12 @@ define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel() {
 
 define void @call_use_intrinsic_workitem_id_x() {
 ; GFX9-LABEL: define void @call_use_intrinsic_workitem_id_x(
-; GFX9-SAME: ) #[[ATTR5]] {
+; GFX9-SAME: ) #[[ATTR4]] {
 ; GFX9-NEXT:    call void @use_intrinsic_workitem_id_x()
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define void @call_use_intrinsic_workitem_id_x(
-; GFX10-SAME: ) #[[ATTR5]] {
+; GFX10-SAME: ) #[[ATTR4]] {
 ; GFX10-NEXT:    call void @use_intrinsic_workitem_id_x()
 ; GFX10-NEXT:    ret void
 ;
@@ -818,12 +818,12 @@ define void @call_use_intrinsic_workitem_id_x() {
 
 define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel() {
 ; GFX9-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
-; GFX9-SAME: ) #[[ATTR5]] {
+; GFX9-SAME: ) #[[ATTR4]] {
 ; GFX9-NEXT:    call void @use_intrinsic_workitem_id_x()
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
-; GFX10-SAME: ) #[[ATTR5]] {
+; GFX10-SAME: ) #[[ATTR4]] {
 ; GFX10-NEXT:    call void @use_intrinsic_workitem_id_x()
 ; GFX10-NEXT:    ret void
 ;
@@ -851,12 +851,12 @@ define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr)
 
 define amdgpu_kernel void @with_inline_asm() {
 ; GFX9-LABEL: define amdgpu_kernel void @with_inline_asm(
-; GFX9-SAME: ) #[[ATTR3]] {
+; GFX9-SAME: ) #[[ATTR0]] {
 ; GFX9-NEXT:    call void asm sideeffect "
 ; GFX9-NEXT:    ret void
 ;
 ; GFX10-LABEL: define amdgpu_kernel void @with_inline_asm(
-; GFX10-SAME: ) #[[ATTR3]] {
+; GFX10-SAME: ) #[[ATTR0]] {
 ; GFX10-NEXT:    call void asm sideeffect "
 ; GFX10-NEXT:    ret void
 ;
@@ -865,19 +865,17 @@ define amdgpu_kernel void @with_inline_asm() {
 }
 
 ;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
 ; GFX9: attributes #[[ATTR2]] = { "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
-; GFX9: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
 ;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
 ; GFX10: attributes #[[ATTR2]] = { "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
-; GFX10: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
 ;.
 ; GFX9: [[META0]] = !{i32 2, i32 10}
 ; GFX9: [[META1]] = !{i32 1, i32 2, i32 3, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 6b5647e..4b14dc6 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -7,11 +7,9 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 | FileCheck %s -check-prefixes=GFX10
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX11,GFX11TRUE16
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 | FileCheck %s -check-prefixes=GFX11,GFX11FAKE16
-; xUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX1250,GFX1250TRUE16
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX1250,GFX1250TRUE16
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 | FileCheck %s -check-prefixes=GFX1250,GFX1250FAKE16
 
-; FIXME: real-true16 version of gfx1250 test fails
-
 define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; GCN-LABEL: test_load_store:
 ; GCN:       ; %bb.0:
@@ -2393,15 +2391,25 @@ define void @test_store_fpimm(ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) {
 ; GFX11FAKE16-NEXT:    global_store_b16 v[2:3], v5, off
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: test_store_fpimm:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_mov_b32_e32 v4, 0x3f80
-; GFX1250-NEXT:    v_mov_b32_e32 v5, 0x4228
-; GFX1250-NEXT:    global_store_b16 v[0:1], v4, off
-; GFX1250-NEXT:    global_store_b16 v[2:3], v5, off
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_store_fpimm:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v4.l, 0x3f80
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v4.h, 0x4228
+; GFX1250TRUE16-NEXT:    global_store_b16 v[0:1], v4, off
+; GFX1250TRUE16-NEXT:    global_store_d16_hi_b16 v[2:3], v4, off
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_store_fpimm:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_mov_b32_e32 v4, 0x3f80
+; GFX1250FAKE16-NEXT:    v_mov_b32_e32 v5, 0x4228
+; GFX1250FAKE16-NEXT:    global_store_b16 v[0:1], v4, off
+; GFX1250FAKE16-NEXT:    global_store_b16 v[2:3], v5, off
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   store bfloat 1.0, ptr addrspace(1) %ptr0
   store bfloat 42.0, ptr addrspace(1) %ptr1
   ret void
@@ -3796,13 +3804,21 @@ define amdgpu_gfx void @test_inreg_arg_store(bfloat inreg %in, ptr addrspace(1)
 ; GFX11FAKE16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: test_inreg_arg_store:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_mov_b32_e32 v2, s4
-; GFX1250-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_inreg_arg_store:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, s4
+; GFX1250TRUE16-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_inreg_arg_store:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_mov_b32_e32 v2, s4
+; GFX1250FAKE16-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   store bfloat %in, ptr addrspace(1) %out
   ret void
 }
@@ -3866,12 +3882,20 @@ define bfloat @test_byval(ptr addrspace(5) byval(bfloat) %bv, bfloat %val) {
 ; GFX11FAKE16-NEXT:    scratch_store_b16 off, v0, s32
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: test_byval:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    scratch_store_b16 off, v0, s32
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_byval:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v0.l
+; GFX1250TRUE16-NEXT:    scratch_store_b16 off, v1, s32
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_byval:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    scratch_store_b16 off, v0, s32
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   store bfloat %val, ptr addrspace(5) %bv
   %retval = load bfloat, ptr addrspace(5) %bv
   ret bfloat %retval
@@ -6708,27 +6732,50 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) {
 ; GFX11FAKE16-NEXT:    scratch_store_b16 v0, v1, off offset:128
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: test_overflow_stack:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    s_clause 0x2
-; GFX1250-NEXT:    scratch_load_b32 v33, off, s32 offset:8
-; GFX1250-NEXT:    scratch_load_b32 v32, off, s32 offset:4
-; GFX1250-NEXT:    scratch_load_b32 v31, off, s32
-; GFX1250-NEXT:    s_clause 0x5
-; GFX1250-NEXT:    scratch_store_b128 v0, v[22:25], off offset:80
-; GFX1250-NEXT:    scratch_store_b128 v0, v[18:21], off offset:64
-; GFX1250-NEXT:    scratch_store_b128 v0, v[14:17], off offset:48
-; GFX1250-NEXT:    scratch_store_b128 v0, v[10:13], off offset:32
-; GFX1250-NEXT:    scratch_store_b128 v0, v[6:9], off offset:16
-; GFX1250-NEXT:    scratch_store_b128 v0, v[2:5], off
-; GFX1250-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-NEXT:    s_clause 0x2
-; GFX1250-NEXT:    scratch_store_b128 v0, v[30:33], off offset:112
-; GFX1250-NEXT:    scratch_store_b128 v0, v[26:29], off offset:96
-; GFX1250-NEXT:    scratch_store_b16 v0, v1, off offset:128
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_overflow_stack:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    s_clause 0x2
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v33, off, s32 offset:8
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v32, off, s32 offset:4
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v31, off, s32
+; GFX1250TRUE16-NEXT:    s_clause 0x3
+; GFX1250TRUE16-NEXT:    scratch_store_b128 v0, v[22:25], off offset:80
+; GFX1250TRUE16-NEXT:    scratch_store_b128 v0, v[18:21], off offset:64
+; GFX1250TRUE16-NEXT:    scratch_store_b128 v0, v[14:17], off offset:48
+; GFX1250TRUE16-NEXT:    scratch_store_b128 v0, v[10:13], off offset:32
+; GFX1250TRUE16-NEXT:    s_clause 0x1
+; GFX1250TRUE16-NEXT:    scratch_store_b128 v0, v[6:9], off offset:16
+; GFX1250TRUE16-NEXT:    scratch_store_b128 v0, v[2:5], off
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250TRUE16-NEXT:    s_clause 0x2
+; GFX1250TRUE16-NEXT:    scratch_store_b128 v0, v[30:33], off offset:112
+; GFX1250TRUE16-NEXT:    scratch_store_b128 v0, v[26:29], off offset:96
+; GFX1250TRUE16-NEXT:    scratch_store_b16 v0, v1, off offset:128
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_overflow_stack:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    s_clause 0x2
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v33, off, s32 offset:8
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v32, off, s32 offset:4
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v31, off, s32
+; GFX1250FAKE16-NEXT:    s_clause 0x5
+; GFX1250FAKE16-NEXT:    scratch_store_b128 v0, v[22:25], off offset:80
+; GFX1250FAKE16-NEXT:    scratch_store_b128 v0, v[18:21], off offset:64
+; GFX1250FAKE16-NEXT:    scratch_store_b128 v0, v[14:17], off offset:48
+; GFX1250FAKE16-NEXT:    scratch_store_b128 v0, v[10:13], off offset:32
+; GFX1250FAKE16-NEXT:    scratch_store_b128 v0, v[6:9], off offset:16
+; GFX1250FAKE16-NEXT:    scratch_store_b128 v0, v[2:5], off
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250FAKE16-NEXT:    s_clause 0x2
+; GFX1250FAKE16-NEXT:    scratch_store_b128 v0, v[30:33], off offset:112
+; GFX1250FAKE16-NEXT:    scratch_store_b128 v0, v[26:29], off offset:96
+; GFX1250FAKE16-NEXT:    scratch_store_b16 v0, v1, off offset:128
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0
   %ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1
   ret { <32 x i32>, bfloat } %ins.1
@@ -10726,15 +10773,29 @@ define bfloat @v_fadd_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fadd_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_add_f32_e32 v0, v0, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fadd_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_add_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fadd_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fadd bfloat %a, %b
   ret bfloat %op
 }
@@ -15268,15 +15329,26 @@ define bfloat @v_fadd_bf16_fpimm_0(bfloat %arg0) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fadd_bf16_fpimm_0:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_add_f32_e32 v0, 1.0, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fadd_bf16_fpimm_0:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_add_f32_e32 v0, 1.0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fadd_bf16_fpimm_0:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %add = fadd bfloat %arg0, 1.0
   ret bfloat %add
 }
@@ -15382,15 +15454,26 @@ define bfloat @v_fadd_bf16_fpimm_1(bfloat %arg0) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fadd_bf16_fpimm_1:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_add_f32_e32 v0, 0x42280000, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fadd_bf16_fpimm_1:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_add_f32_e32 v0, 0x42280000, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fadd_bf16_fpimm_1:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_add_f32_e32 v0, 0x42280000, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %add = fadd bfloat %arg0, 42.0
   ret bfloat %add
 }
@@ -15507,15 +15590,29 @@ define bfloat @v_fsub_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fsub_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fsub_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_sub_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fsub_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fsub bfloat %a, %b
   ret bfloat %op
 }
@@ -15931,21 +16028,37 @@ define <3 x bfloat> @v_fsub_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
 ; GFX11FAKE16-NEXT:    v_alignbit_b32 v1, s0, v1, 16
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fsub_v3bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX1250-NEXT:    v_and_b32_e32 v4, 0xffff0000, v2
-; GFX1250-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v2, 16, v2 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_dual_sub_f32 v4, v5, v4 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1250-NEXT:    v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v4
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fsub_v3bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_dual_lshlrev_b32 v3, 16, v3 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v4, 0xffff0000, v2
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
+; GFX1250TRUE16-NEXT:    v_dual_lshlrev_b32 v2, 16, v2 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_sub_f32_e32 v1, v1, v3
+; GFX1250TRUE16-NEXT:    v_dual_sub_f32 v3, v5, v4 :: v_dual_sub_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v3
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fsub_v3bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v4, 0xffff0000, v2
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v5, 0xffff0000, v0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v2, 16, v2 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_dual_sub_f32 v4, v5, v4 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250FAKE16-NEXT:    v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v4
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fsub <3 x bfloat> %a, %b
   ret <3 x bfloat> %op
 }
@@ -16371,12 +16484,26 @@ define bfloat @v_fmul_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fmul_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_fma_mixlo_bf16 v0, v0, v1, 0 op_sel_hi:[1,1,0]
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fmul_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_mul_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fmul_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_fma_mixlo_bf16 v0, v0, v1, 0 op_sel_hi:[1,1,0]
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fmul bfloat %a, %b
   ret bfloat %op
 }
@@ -21012,31 +21139,60 @@ define bfloat @v_fdiv_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fdiv_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v0, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
-; GFX1250-NEXT:    v_rcp_f32_e32 v3, v2
-; GFX1250-NEXT:    v_nop
-; GFX1250-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
-; GFX1250-NEXT:    v_fmac_f32_e32 v3, v4, v3
-; GFX1250-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_mul_f32_e32 v5, v4, v3
-; GFX1250-NEXT:    v_fma_f32 v6, -v2, v5, v4
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_fmac_f32_e32 v5, v6, v3
-; GFX1250-NEXT:    v_fma_f32 v2, -v2, v5, v4
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
-; GFX1250-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fdiv_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v0.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.l, v2.l
+; GFX1250TRUE16-NEXT:    v_div_scale_f32 v1, null, v0, v0, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX1250TRUE16-NEXT:    v_nop
+; GFX1250TRUE16-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_fmac_f32_e32 v3, v4, v3
+; GFX1250TRUE16-NEXT:    v_div_scale_f32 v4, vcc_lo, v2, v0, v2
+; GFX1250TRUE16-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_fma_f32 v6, -v1, v5, v4
+; GFX1250TRUE16-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_fma_f32 v1, -v1, v5, v4
+; GFX1250TRUE16-NEXT:    v_div_fmas_f32 v1, v1, v3, v5
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_div_fixup_f32 v0, v1, v0, v2
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fdiv_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v0, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
+; GFX1250FAKE16-NEXT:    v_rcp_f32_e32 v3, v2
+; GFX1250FAKE16-NEXT:    v_nop
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
+; GFX1250FAKE16-NEXT:    v_fmac_f32_e32 v3, v4, v3
+; GFX1250FAKE16-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_mul_f32_e32 v5, v4, v3
+; GFX1250FAKE16-NEXT:    v_fma_f32 v6, -v2, v5, v4
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_fmac_f32_e32 v5, v6, v3
+; GFX1250FAKE16-NEXT:    v_fma_f32 v2, -v2, v5, v4
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
+; GFX1250FAKE16-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fdiv bfloat %a, %b
   ret bfloat %op
 }
@@ -21092,12 +21248,19 @@ define bfloat @v_fabs_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fabs_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fabs_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fabs_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.fabs.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -21198,12 +21361,19 @@ define bfloat @v_fneg_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fneg_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fneg_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v0.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fneg_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fneg bfloat %a
   ret bfloat %op
 }
@@ -21317,12 +21487,19 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_or_b32_e32 v0, 0x8000, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fneg_fabs_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_or_b32_e32 v0, 0x8000, v0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fneg_fabs_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v0.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fneg_fabs_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_or_b32_e32 v0, 0x8000, v0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %fabs = call bfloat @llvm.fabs.bf16(bfloat %a)
   %op = fneg bfloat %fabs
   ret bfloat %op
@@ -21511,15 +21688,29 @@ define bfloat @v_minnum_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_minnum_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_min_num_f32_e32 v0, v0, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_minnum_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_min_num_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_minnum_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_min_num_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.minnum.bf16(bfloat %a, bfloat %b)
   ret bfloat %op
 }
@@ -26073,15 +26264,29 @@ define bfloat @v_maxnum_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_maxnum_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_max_num_f32_e32 v0, v0, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_maxnum_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_max_num_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_maxnum_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_max_num_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.maxnum.bf16(bfloat %a, bfloat %b)
   ret bfloat %op
 }
@@ -30764,12 +30969,19 @@ define bfloat @v_sqrt_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_sqrt_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_sqrt_bf16_e32 v0, v0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sqrt_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_sqrt_bf16_e32 v0.l, v0.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sqrt_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_sqrt_bf16_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.sqrt.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -30877,15 +31089,26 @@ define bfloat @v_ldexp_bf16_i32(bfloat %a, i32 %b) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_ldexp_bf16_i32:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_ldexp_bf16_i32:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v0, v2, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_ldexp_bf16_i32:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.ldexp.bf16.i32(bfloat %a, i32 %b)
   ret bfloat %op
 }
@@ -31005,16 +31228,28 @@ define { bfloat, i16 } @v_frexp_bf16_i16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_frexp_bf16_i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_frexp_mant_f32_e32 v0, v1
-; GFX1250-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_frexp_bf16_i16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_frexp_mant_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_frexp_bf16_i16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_frexp_mant_f32_e32 v0, v1
+; GFX1250FAKE16-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call { bfloat, i16 } @llvm.frexp.bf16.i16(bfloat %a)
   ret { bfloat, i16 } %op
 }
@@ -31254,31 +31489,58 @@ define bfloat @v_log_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_log_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1250-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
-; GFX1250-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
-; GFX1250-NEXT:    v_log_f32_e32 v0, v0
-; GFX1250-NEXT:    v_nop
-; GFX1250-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1250-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1250-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
-; GFX1250-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_log_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v1
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 32, vcc_lo
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v0, v1, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT:    v_log_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    v_nop
+; GFX1250TRUE16-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1250TRUE16-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1250TRUE16-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_log_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250FAKE16-NEXT:    v_log_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_nop
+; GFX1250FAKE16-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1250FAKE16-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1250FAKE16-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.log.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -31439,12 +31701,19 @@ define bfloat @v_log2_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_log2_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_log_bf16_e32 v0, v0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_log2_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_log_bf16_e32 v0.l, v0.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_log2_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_log_bf16_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.log2.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -31679,31 +31948,58 @@ define bfloat @v_log10_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_log10_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1250-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
-; GFX1250-NEXT:    v_ldexp_f32 v0, v0, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
-; GFX1250-NEXT:    v_log_f32_e32 v0, v0
-; GFX1250-NEXT:    v_nop
-; GFX1250-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX1250-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1250-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_add_f32_e32 v1, v1, v2
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
-; GFX1250-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_sub_f32_e32 v0, v0, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_log10_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v1
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 32, vcc_lo
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v0, v1, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT:    v_log_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    v_nop
+; GFX1250TRUE16-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1250TRUE16-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1250TRUE16-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_log10_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 32, vcc_lo
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250FAKE16-NEXT:    v_log_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_nop
+; GFX1250FAKE16-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1250FAKE16-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1250FAKE16-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_add_f32_e32 v1, v1, v2
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.log10.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -31946,34 +32242,65 @@ define bfloat @v_exp_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_exp_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT:    s_mov_b32 s0, 0x3fb8aa3b
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_mul_f32_e32 v2, 0x3fb8aa3b, v1
-; GFX1250-NEXT:    v_rndne_f32_e32 v3, v2
-; GFX1250-NEXT:    v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
-; GFX1250-NEXT:    s_mov_b32 s0, 0x32a5705f
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_sub_f32_e32 v2, v2, v3
-; GFX1250-NEXT:    v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
-; GFX1250-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0xc2ce8ed0, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_add_f32_e32 v0, v2, v0
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v2, v3
-; GFX1250-NEXT:    v_exp_f32_e32 v0, v0
-; GFX1250-NEXT:    v_nop
-; GFX1250-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; GFX1250-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0x42b17218, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_exp_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_mov_b32 s0, 0x3fb8aa3b
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; GFX1250TRUE16-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0xc2ce8ed0, v1
+; GFX1250TRUE16-NEXT:    v_fma_mix_f32_bf16 v3, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT:    v_rndne_f32_e32 v4, v2
+; GFX1250TRUE16-NEXT:    s_mov_b32 s0, 0x32a5705f
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT:    v_fma_mix_f32_bf16 v0, v0, s0, v3 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_sub_f32_e32 v2, v2, v4
+; GFX1250TRUE16-NEXT:    v_add_f32_e32 v0, v2, v0
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v2, v4
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT:    v_exp_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    v_nop
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250TRUE16-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0x42b17218, v1
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_exp_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT:    s_mov_b32 s0, 0x3fb8aa3b
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; GFX1250FAKE16-NEXT:    v_rndne_f32_e32 v3, v2
+; GFX1250FAKE16-NEXT:    v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT:    s_mov_b32 s0, 0x32a5705f
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_sub_f32_e32 v2, v2, v3
+; GFX1250FAKE16-NEXT:    v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0xc2ce8ed0, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_add_f32_e32 v0, v2, v0
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v2, v3
+; GFX1250FAKE16-NEXT:    v_exp_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_nop
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0x42b17218, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.exp.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -32138,12 +32465,19 @@ define bfloat @v_exp2_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_exp2_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_exp_bf16_e32 v0, v0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_exp2_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_exp_bf16_e32 v0.l, v0.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_exp2_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_exp_bf16_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.exp2.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -32382,34 +32716,65 @@ define bfloat @v_exp10_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_exp10_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT:    s_mov_b32 s0, 0x40549a78
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_mul_f32_e32 v2, 0x40549a78, v1
-; GFX1250-NEXT:    v_rndne_f32_e32 v3, v2
-; GFX1250-NEXT:    v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
-; GFX1250-NEXT:    s_mov_b32 s0, 0x33979a37
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_sub_f32_e32 v2, v2, v3
-; GFX1250-NEXT:    v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
-; GFX1250-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0xc23369f4, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_add_f32_e32 v0, v2, v0
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v2, v3
-; GFX1250-NEXT:    v_exp_f32_e32 v0, v0
-; GFX1250-NEXT:    v_nop
-; GFX1250-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_ldexp_f32 v0, v0, v2
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; GFX1250-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0x421a209b, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_exp10_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_mov_b32 s0, 0x40549a78
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_mul_f32_e32 v2, 0x40549a78, v1
+; GFX1250TRUE16-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0xc23369f4, v1
+; GFX1250TRUE16-NEXT:    v_fma_mix_f32_bf16 v3, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT:    v_rndne_f32_e32 v4, v2
+; GFX1250TRUE16-NEXT:    s_mov_b32 s0, 0x33979a37
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT:    v_fma_mix_f32_bf16 v0, v0, s0, v3 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_sub_f32_e32 v2, v2, v4
+; GFX1250TRUE16-NEXT:    v_add_f32_e32 v0, v2, v0
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v2, v4
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT:    v_exp_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    v_nop
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250TRUE16-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0x421a209b, v1
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_exp10_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT:    s_mov_b32 s0, 0x40549a78
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_mul_f32_e32 v2, 0x40549a78, v1
+; GFX1250FAKE16-NEXT:    v_rndne_f32_e32 v3, v2
+; GFX1250FAKE16-NEXT:    v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT:    s_mov_b32 s0, 0x33979a37
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_sub_f32_e32 v2, v2, v3
+; GFX1250FAKE16-NEXT:    v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0xc23369f4, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_add_f32_e32 v0, v2, v0
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v2, v3
+; GFX1250FAKE16-NEXT:    v_exp_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_nop
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0x421a209b, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.exp10.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -32517,15 +32882,26 @@ define bfloat @v_ceil_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_ceil_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_ceil_f32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_ceil_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_ceil_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_ceil_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_ceil_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.ceil.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -32633,15 +33009,26 @@ define bfloat @v_trunc_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_trunc_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_trunc_f32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_trunc_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_trunc_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_trunc_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_trunc_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.trunc.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -32749,15 +33136,26 @@ define bfloat @v_rint_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_rint_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_rint_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_rndne_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_rint_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.rint.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -32865,15 +33263,26 @@ define bfloat @v_nearbyint_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_nearbyint_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_nearbyint_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_rndne_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_nearbyint_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.nearbyint.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -33031,23 +33440,42 @@ define bfloat @v_round_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_round_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_trunc_f32_e32 v1, v0
-; GFX1250-NEXT:    v_sub_f32_e32 v2, v0, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_ge_f32_e64 s0, |v2|, 0.5
-; GFX1250-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_bfi_b32 v0, 0x7fffffff, v2, v0
-; GFX1250-NEXT:    v_add_f32_e32 v0, v1, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_round_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_trunc_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_sub_f32_e32 v2, v1, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cmp_ge_f32_e64 s0, |v2|, 0.5
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_bfi_b32 v1, 0x7fffffff, v2, v1
+; GFX1250TRUE16-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_round_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX1250FAKE16-NEXT:    v_sub_f32_e32 v2, v0, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_ge_f32_e64 s0, |v2|, 0.5
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_bfi_b32 v0, 0x7fffffff, v2, v0
+; GFX1250FAKE16-NEXT:    v_add_f32_e32 v0, v1, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.round.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -33155,15 +33583,26 @@ define bfloat @v_roundeven_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_roundeven_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_roundeven_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_rndne_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_roundeven_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.roundeven.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -33271,15 +33710,26 @@ define bfloat @v_floor_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_floor_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_floor_f32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_floor_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_floor_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_floor_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_floor_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.floor.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -33385,15 +33835,26 @@ define bfloat @v_canonicalize_bf16(bfloat %a) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_canonicalize_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_max_num_f32_e32 v0, v0, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_canonicalize_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_max_num_f32_e32 v0, v1, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_canonicalize_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_max_num_f32_e32 v0, v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.canonicalize.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -33535,15 +33996,28 @@ define i1 @v_fcmp_oeq_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_oeq_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_eq_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_oeq_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_oeq_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp oeq bfloat %a, %b
   ret i1 %op
 }
@@ -33630,15 +34104,28 @@ define i1 @v_fcmp_ogt_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_ogt_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ogt_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ogt_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_gt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp ogt bfloat %a, %b
   ret i1 %op
 }
@@ -33725,15 +34212,28 @@ define i1 @v_fcmp_oge_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_oge_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_oge_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_oge_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_ge_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp oge bfloat %a, %b
   ret i1 %op
 }
@@ -33820,15 +34320,28 @@ define i1 @v_fcmp_olt_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_olt_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_olt_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_olt_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_lt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp olt bfloat %a, %b
   ret i1 %op
 }
@@ -33915,15 +34428,28 @@ define i1 @v_fcmp_ole_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_ole_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_le_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ole_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_le_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ole_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_le_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp ole bfloat %a, %b
   ret i1 %op
 }
@@ -34010,15 +34536,28 @@ define i1 @v_fcmp_one_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_one_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_lg_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_one_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_lg_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_one_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_lg_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp one bfloat %a, %b
   ret i1 %op
 }
@@ -34105,15 +34644,28 @@ define i1 @v_fcmp_uno_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_uno_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_uno_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_uno_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp uno bfloat %a, %b
   ret i1 %op
 }
@@ -34200,15 +34752,28 @@ define i1 @v_fcmp_ueq_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_ueq_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_nlg_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ueq_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_nlg_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ueq_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_nlg_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp ueq bfloat %a, %b
   ret i1 %op
 }
@@ -34295,15 +34860,28 @@ define i1 @v_fcmp_ugt_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_ugt_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_nle_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ugt_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_nle_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ugt_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_nle_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp ugt bfloat %a, %b
   ret i1 %op
 }
@@ -34390,15 +34968,28 @@ define i1 @v_fcmp_uge_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_uge_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_uge_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_uge_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp uge bfloat %a, %b
   ret i1 %op
 }
@@ -34485,15 +35076,28 @@ define i1 @v_fcmp_ult_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_ult_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_nge_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ult_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_nge_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ult_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_nge_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp ult bfloat %a, %b
   ret i1 %op
 }
@@ -34580,15 +35184,28 @@ define i1 @v_fcmp_ule_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_ule_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ule_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ule_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp ule bfloat %a, %b
   ret i1 %op
 }
@@ -34675,15 +35292,28 @@ define i1 @v_fcmp_une_bf16(bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fcmp_une_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_neq_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_une_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_neq_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_une_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_neq_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fcmp une bfloat %a, %b
   ret i1 %op
 }
@@ -34790,14 +35420,24 @@ define i16 @v_fptosi_bf16_to_i16(bfloat %x) {
 ; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fptosi_bf16_to_i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_bf16_to_i16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_bf16_to_i16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fptosi bfloat %x to i16
   ret i16 %op
 }
@@ -34899,18 +35539,31 @@ define <2 x i16> @v_fptosi_v2bf16_to_v2i16(<2 x bfloat> %x) {
 ; GFX11FAKE16-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fptosi_v2bf16_to_v2i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_v2bf16_to_v2i16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v1, 0xffff0000, v0
+; GFX1250TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_v2bf16_to_v2i16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fptosi <2 x bfloat> %x to <2 x i16>
   ret <2 x i16> %op
 }
@@ -35032,19 +35685,33 @@ define <3 x i16> @v_fptosi_v3bf16_to_v3i16(<3 x bfloat> %x) {
 ; GFX11FAKE16-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fptosi_v3bf16_to_v3i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v2, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v2, v2
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_v3bf16_to_v3i16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff0000, v0
+; GFX1250TRUE16-NEXT:    v_dual_lshlrev_b32 v0, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.h, v2.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_v3bf16_to_v3i16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v2, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fptosi <3 x bfloat> %x to <3 x i16>
   ret <3 x i16> %op
 }
@@ -35198,23 +35865,41 @@ define <4 x i16> @v_fptosi_v4bf16_to_v4i16(<4 x bfloat> %x) {
 ; GFX11FAKE16-NEXT:    v_perm_b32 v1, v1, v2, 0x5040100
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fptosi_v4bf16_to_v4i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshlrev_b32 v2, 16, v1 :: v_dual_lshlrev_b32 v3, 16, v0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1250-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v2, v2
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v3, v3
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_perm_b32 v0, v0, v3, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v1, v1, v2, 0x5040100
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_v4bf16_to_v4i16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff0000, v0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
+; GFX1250TRUE16-NEXT:    v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v3, v3
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.h, v2.l
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v3.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_v4bf16_to_v4i16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshlrev_b32 v2, 16, v1 :: v_dual_lshlrev_b32 v3, 16, v0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v3, v3
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v0, v3, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v1, v1, v2, 0x5040100
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fptosi <4 x bfloat> %x to <4 x i16>
   ret <4 x i16> %op
 }
@@ -35274,14 +35959,24 @@ define i32 @v_fptosi_bf16_to_i32(bfloat %x) {
 ; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fptosi_bf16_to_i32:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_bf16_to_i32:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_i32_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_bf16_to_i32:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fptosi bfloat %x to i32
   ret i32 %op
 }
@@ -35729,26 +36424,48 @@ define i64 @v_fptosi_bf16_to_i64(bfloat %x) {
 ; GFX11FAKE16-NEXT:    v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fptosi_bf16_to_i64:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_trunc_f32_e32 v0, v0
-; GFX1250-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_floor_f32_e32 v1, v1
-; GFX1250-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX1250-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
-; GFX1250-NEXT:    v_cvt_u32_f32_e32 v3, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX1250-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_bitop2_b32 v3, v3, v0 bitop3:0x14
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_xor_b32_e32 v2, v2, v0
-; GFX1250-NEXT:    v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1]
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_bf16_to_i64:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_trunc_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_floor_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX1250TRUE16-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX1250TRUE16-NEXT:    v_cvt_u32_f32_e32 v3, v1
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX1250TRUE16-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_bitop2_b32 v3, v3, v0 bitop3:0x14
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_xor_b32_e32 v2, v2, v0
+; GFX1250TRUE16-NEXT:    v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_bf16_to_i64:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_trunc_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_floor_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX1250FAKE16-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
+; GFX1250FAKE16-NEXT:    v_cvt_u32_f32_e32 v3, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_bitop2_b32 v3, v3, v0 bitop3:0x14
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_xor_b32_e32 v2, v2, v0
+; GFX1250FAKE16-NEXT:    v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = fptosi bfloat %x to i64
   ret i64 %op
 }
@@ -37293,22 +38010,39 @@ define <3 x bfloat> @v_sitofp_v3i16_to_v3bf16(<3 x i16> %x) {
 ; GFX11FAKE16-NEXT:    v_alignbit_b32 v1, s0, v1, 16
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_sitofp_v3i16_to_v3bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_ashrrev_i32_e32 v2, 16, v0
-; GFX1250-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX1250-NEXT:    v_bfe_i32 v1, v1, 0, 16
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_cvt_f32_i32_e32 v2, v2
-; GFX1250-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sitofp_v3i16_to_v3bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; GFX1250TRUE16-NEXT:    v_ashrrev_i32_e32 v2, 16, v0
+; GFX1250TRUE16-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v2, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sitofp_v3i16_to_v3bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_ashrrev_i32_e32 v2, 16, v0
+; GFX1250FAKE16-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX1250FAKE16-NEXT:    v_bfe_i32 v1, v1, 0, 16
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = sitofp <3 x i16> %x to <3 x bfloat>
   ret <3 x bfloat> %op
 }
@@ -37972,17 +38706,31 @@ define <3 x bfloat> @v_sitofp_v3i32_to_v3bf16(<3 x i32> %x) {
 ; GFX11FAKE16-NEXT:    v_alignbit_b32 v1, s0, v2, 16
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_sitofp_v3i32_to_v3bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX1250-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_f32_i32_e32 v2, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v1, v2, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sitofp_v3i32_to_v3bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v2, v2
+; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sitofp_v3i32_to_v3bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v1, v2, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = sitofp <3 x i32> %x to <3 x bfloat>
   ret <3 x bfloat> %op
 }
@@ -39232,52 +39980,101 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
 ; GFX11FAKE16-NEXT:    v_alignbit_b32 v1, s0, v1, 16
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_sitofp_v3i64_to_v3bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_xor_b32_e32 v8, v4, v5
-; GFX1250-NEXT:    v_xor_b32_e32 v6, v2, v3
-; GFX1250-NEXT:    v_cls_i32_e32 v10, v3
-; GFX1250-NEXT:    v_cls_i32_e32 v9, v5
-; GFX1250-NEXT:    v_cls_i32_e32 v11, v1
-; GFX1250-NEXT:    v_dual_ashrrev_i32 v8, 31, v8 :: v_dual_bitop2_b32 v7, v0, v1 bitop3:0x14
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
-; GFX1250-NEXT:    v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_add_min_u32_e64 v6, v10, -1, v6
-; GFX1250-NEXT:    v_add_min_u32_e64 v7, v11, -1, v7
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
-; GFX1250-NEXT:    v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_min_u32_e32 v2, 1, v2
-; GFX1250-NEXT:    v_add_nc_u32_e32 v8, 32, v8
-; GFX1250-NEXT:    v_min_u32_e32 v0, 1, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_or_b32_e32 v2, v3, v2
-; GFX1250-NEXT:    v_add_min_u32_e64 v8, v9, -1, v8
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
-; GFX1250-NEXT:    v_cvt_f32_i32_e32 v2, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
-; GFX1250-NEXT:    v_sub_nc_u32_e32 v8, 32, v8
-; GFX1250-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_min_u32_e32 v4, 1, v4
-; GFX1250-NEXT:    v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
-; GFX1250-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_f32_i32_e32 v1, v1
-; GFX1250-NEXT:    v_ldexp_f32 v0, v0, v4
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_ldexp_f32 v1, v1, v8
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sitofp_v3i64_to_v3bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_xor_b32_e32 v7, v2, v3
+; GFX1250TRUE16-NEXT:    v_xor_b32_e32 v6, v4, v5
+; GFX1250TRUE16-NEXT:    v_cls_i32_e32 v10, v3
+; GFX1250TRUE16-NEXT:    v_cls_i32_e32 v9, v5
+; GFX1250TRUE16-NEXT:    v_cls_i32_e32 v11, v1
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_dual_ashrrev_i32 v7, 31, v7 :: v_dual_ashrrev_i32 v6, 31, v6
+; GFX1250TRUE16-NEXT:    v_xor_b32_e32 v8, v0, v1
+; GFX1250TRUE16-NEXT:    v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_ashrrev_i32_e32 v8, 31, v8
+; GFX1250TRUE16-NEXT:    v_add_min_u32_e64 v7, v10, -1, v7
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_add_min_u32_e64 v6, v9, -1, v6
+; GFX1250TRUE16-NEXT:    v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
+; GFX1250TRUE16-NEXT:    v_min_u32_e32 v2, 1, v2
+; GFX1250TRUE16-NEXT:    v_add_nc_u32_e32 v8, 32, v8
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_min_u32_e32 v4, 1, v4
+; GFX1250TRUE16-NEXT:    v_or_b32_e32 v2, v3, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_add_min_u32_e64 v8, v11, -1, v8
+; GFX1250TRUE16-NEXT:    v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v2, v2
+; GFX1250TRUE16-NEXT:    v_lshlrev_b64_e32 v[0:1], v8, v[0:1]
+; GFX1250TRUE16-NEXT:    v_sub_nc_u32_e32 v5, 32, v8
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_min_u32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT:    v_or_b32_e32 v0, v1, v0
+; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v1, v4
+; GFX1250TRUE16-NEXT:    v_sub_nc_u32_e32 v4, 32, v7
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v1, v1, v3
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v2, v2, v4
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v0, v0, v5
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sitofp_v3i64_to_v3bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_xor_b32_e32 v8, v4, v5
+; GFX1250FAKE16-NEXT:    v_xor_b32_e32 v6, v2, v3
+; GFX1250FAKE16-NEXT:    v_cls_i32_e32 v10, v3
+; GFX1250FAKE16-NEXT:    v_cls_i32_e32 v9, v5
+; GFX1250FAKE16-NEXT:    v_cls_i32_e32 v11, v1
+; GFX1250FAKE16-NEXT:    v_dual_ashrrev_i32 v8, 31, v8 :: v_dual_bitop2_b32 v7, v0, v1 bitop3:0x14
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
+; GFX1250FAKE16-NEXT:    v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_add_min_u32_e64 v6, v10, -1, v6
+; GFX1250FAKE16-NEXT:    v_add_min_u32_e64 v7, v11, -1, v7
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
+; GFX1250FAKE16-NEXT:    v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_min_u32_e32 v2, 1, v2
+; GFX1250FAKE16-NEXT:    v_add_nc_u32_e32 v8, 32, v8
+; GFX1250FAKE16-NEXT:    v_min_u32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_or_b32_e32 v2, v3, v2
+; GFX1250FAKE16-NEXT:    v_add_min_u32_e64 v8, v9, -1, v8
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
+; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
+; GFX1250FAKE16-NEXT:    v_sub_nc_u32_e32 v8, 32, v8
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v2, v2, v3
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_min_u32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT:    v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
+; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v0, v0, v4
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v1, v1, v8
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = sitofp <3 x i64> %x to <3 x bfloat>
   ret <3 x bfloat> %op
 }
@@ -40015,15 +40812,26 @@ define bfloat @v_uitofp_i16_to_bf16(i16 %x) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_uitofp_i16_to_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_i16_to_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_i16_to_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = uitofp i16 %x to bfloat
   ret bfloat %op
 }
@@ -40167,18 +40975,32 @@ define <2 x bfloat> @v_uitofp_v2i16_to_v2bf16(<2 x i16> %x) {
 ; GFX11FAKE16-NEXT:    v_perm_b32 v0, v0, v1, 0x7060302
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_uitofp_v2i16_to_v2bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v2i16_to_v2bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, 0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff, v0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v0.h
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v0, v2
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v1, v1
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v2i16_to_v2bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = uitofp <2 x i16> %x to <2 x bfloat>
   ret <2 x bfloat> %op
 }
@@ -40373,22 +41195,41 @@ define <3 x bfloat> @v_uitofp_v3i16_to_v3bf16(<3 x i16> %x) {
 ; GFX11FAKE16-NEXT:    v_alignbit_b32 v1, s0, v1, 16
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_uitofp_v3i16_to_v3bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v3i16_to_v3bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff, v0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v3.h, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v3.l, v0.h
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v0, v1
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v1, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v2, v3
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v3, v0, s0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v1, v2
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v3.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v3i16_to_v3bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = uitofp <3 x i16> %x to <3 x bfloat>
   ret <3 x bfloat> %op
 }
@@ -40626,23 +41467,43 @@ define <4 x bfloat> @v_uitofp_v4i16_to_v4bf16(<4 x i16> %x) {
 ; GFX11FAKE16-NEXT:    v_perm_b32 v1, v1, v2, 0x7060302
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_uitofp_v4i16_to_v4bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v2, 16, v1 :: v_dual_lshrrev_b32 v3, 16, v0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v3, v3
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v3
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v1, v1, v2
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v4i16_to_v4bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.h, 0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, v1.h
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v3, 0xffff, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v2.l, v0.h
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v1
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v1, v3
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v2, v2
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v3, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v1, v2
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v1, v3, v4
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v4i16_to_v4bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v2, 16, v1 :: v_dual_lshrrev_b32 v3, 16, v0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v3, v3
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v3
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, v2
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = uitofp <4 x i16> %x to <4 x bfloat>
   ret <4 x bfloat> %op
 }
@@ -41058,17 +41919,31 @@ define <3 x bfloat> @v_uitofp_v3i32_to_v3bf16(<3 x i32> %x) {
 ; GFX11FAKE16-NEXT:    v_alignbit_b32 v1, s0, v2, 16
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_uitofp_v3i32_to_v3bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v1, v2, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v3i32_to_v3bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v2, v2
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v1, v1
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v3i32_to_v3bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v1, v2, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = uitofp <3 x i32> %x to <3 x bfloat>
   ret <3 x bfloat> %op
 }
@@ -42105,44 +42980,84 @@ define <3 x bfloat> @v_uitofp_v3i64_to_v3bf16(<3 x i64> %x) {
 ; GFX11FAKE16-NEXT:    v_alignbit_b32 v1, s0, v1, 16
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_uitofp_v3i64_to_v3bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_clz_i32_u32_e32 v6, v3
-; GFX1250-NEXT:    v_clz_i32_u32_e32 v7, v1
-; GFX1250-NEXT:    v_clz_i32_u32_e32 v8, v5
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_min_u32_e32 v6, 32, v6
-; GFX1250-NEXT:    v_min_u32_e32 v7, 32, v7
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_min_u32_e32 v8, 32, v8
-; GFX1250-NEXT:    v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
-; GFX1250-NEXT:    v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_min_u32_e32 v2, 1, v2
-; GFX1250-NEXT:    v_min_u32_e32 v0, 1, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_min_u32_e32 v4, 1, v4
-; GFX1250-NEXT:    v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_bitop2_b32 v2, v3, v2 bitop3:0x54
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
-; GFX1250-NEXT:    v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT:    v_ldexp_f32 v2, v2, v3
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_ldexp_f32 v0, v0, v4
-; GFX1250-NEXT:    v_ldexp_f32 v1, v1, v8
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v3i64_to_v3bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_clz_i32_u32_e32 v6, v5
+; GFX1250TRUE16-NEXT:    v_clz_i32_u32_e32 v7, v3
+; GFX1250TRUE16-NEXT:    v_clz_i32_u32_e32 v8, v1
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_min_u32_e32 v6, 32, v6
+; GFX1250TRUE16-NEXT:    v_min_u32_e32 v7, 32, v7
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_min_u32_e32 v8, 32, v8
+; GFX1250TRUE16-NEXT:    v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
+; GFX1250TRUE16-NEXT:    v_lshlrev_b64_e32 v[0:1], v8, v[0:1]
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_min_u32_e32 v4, 1, v4
+; GFX1250TRUE16-NEXT:    v_min_u32_e32 v2, 1, v2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_min_u32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT:    v_or_b32_e32 v4, v5, v4
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_or_b32_e32 v2, v3, v2
+; GFX1250TRUE16-NEXT:    v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
+; GFX1250TRUE16-NEXT:    v_sub_nc_u32_e32 v5, 32, v8
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v1, v4
+; GFX1250TRUE16-NEXT:    v_sub_nc_u32_e32 v4, 32, v7
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v2, v2
+; GFX1250TRUE16-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v1, v1, v3
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v2, v2, v4
+; GFX1250TRUE16-NEXT:    v_ldexp_f32 v0, v0, v5
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v3i64_to_v3bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_clz_i32_u32_e32 v6, v3
+; GFX1250FAKE16-NEXT:    v_clz_i32_u32_e32 v7, v1
+; GFX1250FAKE16-NEXT:    v_clz_i32_u32_e32 v8, v5
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_min_u32_e32 v6, 32, v6
+; GFX1250FAKE16-NEXT:    v_min_u32_e32 v7, 32, v7
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_min_u32_e32 v8, 32, v8
+; GFX1250FAKE16-NEXT:    v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
+; GFX1250FAKE16-NEXT:    v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_min_u32_e32 v2, 1, v2
+; GFX1250FAKE16-NEXT:    v_min_u32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_min_u32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT:    v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_bitop2_b32 v2, v3, v2 bitop3:0x54
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
+; GFX1250FAKE16-NEXT:    v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v2, v2, v3
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v0, v0, v4
+; GFX1250FAKE16-NEXT:    v_ldexp_f32 v1, v1, v8
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT:    v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = uitofp <3 x i64> %x to <3 x bfloat>
   ret <3 x bfloat> %op
 }
@@ -42717,15 +43632,25 @@ define bfloat @v_select_bf16(i1 %cond, bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_select_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = select i1 %cond, bfloat %a, bfloat %b
   ret bfloat %op
 }
@@ -42810,16 +43735,27 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_select_fneg_lhs_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX1250-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v1.l
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %neg.a = fneg bfloat %a
   %op = select i1 %cond, bfloat %neg.a, bfloat %b
   ret bfloat %op
@@ -42905,16 +43841,27 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
 ; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_select_fneg_rhs_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX1250-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v2.l
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %neg.b = fneg bfloat %b
   %op = select i1 %cond, bfloat %a, bfloat %neg.b
   ret bfloat %op
@@ -43025,18 +43972,29 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
 ; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_select_v2bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v3, 16, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_cndmask_b32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc_lo
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_v2bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v1.h, vcc_lo
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_v2bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v3, 16, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_cndmask_b32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc_lo
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = select i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b
   ret <2 x bfloat> %op
 }
@@ -43155,20 +44113,34 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo
 ; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_vselect_v2bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v5, 16, v3 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
-; GFX1250-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v2bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s0, 1, v0.h
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, vcc_lo
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v2bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v5, 16, v3 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = select <2 x i1> %cond, <2 x bfloat> %a, <2 x bfloat> %b
   ret <2 x bfloat> %op
 }
@@ -43256,16 +44228,26 @@ define amdgpu_ps i32 @s_select_bf16(bfloat inreg %a, bfloat inreg %b, i32 %c) {
 ; GFX11FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11FAKE16-NEXT:    ; return to shader part epilog
 ;
-; GFX1250-LABEL: s_select_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    v_mov_b32_e32 v1, s0
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, s1, v1, vcc_lo
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT:    ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_select_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, s0
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.h, 0
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, s1, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX1250TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_select_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    v_mov_b32_e32 v1, s0
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, s1, v1, vcc_lo
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT:    ; return to shader part epilog
   %cond = icmp eq i32 %c, 0
   %op = select i1 %cond, bfloat %a, bfloat %b
   %cast = bitcast bfloat %op to i16
@@ -43402,20 +44384,34 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
 ; GFX11FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11FAKE16-NEXT:    ; return to shader part epilog
 ;
-; GFX1250-LABEL: s_select_v2bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_lshr_b32 s2, s0, 16
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s0
-; GFX1250-NEXT:    s_lshr_b32 s3, s1, 16
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, s3, v1, vcc_lo
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v1, s1, v2, vcc_lo
-; GFX1250-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT:    ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_select_v2bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, s2
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.l, s0
+; GFX1250TRUE16-NEXT:    s_lshr_b32 s0, s1, 16
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.h, s0, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, s1, v0.l, vcc_lo
+; GFX1250TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX1250TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_select_v2bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s0
+; GFX1250FAKE16-NEXT:    s_lshr_b32 s3, s1, 16
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, s3, v1, vcc_lo
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v1, s1, v2, vcc_lo
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT:    ; return to shader part epilog
   %cond = icmp eq i32 %c, 0
   %op = select i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b
   %cast = bitcast <2 x bfloat> %op to i32
@@ -43554,21 +44550,36 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
 ; GFX11FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11FAKE16-NEXT:    ; return to shader part epilog
 ;
-; GFX1250-LABEL: s_vselect_v2bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_lshr_b32 s2, s0, 16
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX1250-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s0
-; GFX1250-NEXT:    s_lshr_b32 s0, s1, 16
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v1, s0, v2, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, s1, v3, vcc_lo
-; GFX1250-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT:    ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_vselect_v2bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_lshr_b32 s3, s0, 16
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e64 s2, 0, v1
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.l, s3
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.h, s0
+; GFX1250TRUE16-NEXT:    s_lshr_b32 s0, s1, 16
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.h, s0, v0.l, s2
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.l, s1, v0.h, vcc_lo
+; GFX1250TRUE16-NEXT:    v_readfirstlane_b32 s0, v1
+; GFX1250TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_vselect_v2bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX1250FAKE16-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s0
+; GFX1250FAKE16-NEXT:    s_lshr_b32 s0, s1, 16
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v1, s0, v2, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, s1, v3, vcc_lo
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT:    ; return to shader part epilog
   %cond = icmp eq <2 x i32> %c, zeroinitializer
   %op = select <2 x i1> %cond, <2 x bfloat> %a, <2 x bfloat> %b
   %cast = bitcast <2 x bfloat> %op to i32
@@ -45557,32 +46568,55 @@ define amdgpu_ps <2 x i32> @s_vselect_v4bf16(<4 x bfloat> inreg %a, <4 x bfloat>
 ; GFX11FAKE16-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX11FAKE16-NEXT:    ; return to shader part epilog
 ;
-; GFX1250-LABEL: s_vselect_v4bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_lshr_b32 s4, s1, 16
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1250-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s1
-; GFX1250-NEXT:    s_lshr_b32 s4, s3, 16
-; GFX1250-NEXT:    s_lshr_b32 s5, s0, 16
-; GFX1250-NEXT:    v_mov_b32_e32 v6, s0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v3, s4, v4, vcc_lo
-; GFX1250-NEXT:    v_mov_b32_e32 v4, s5
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX1250-NEXT:    s_lshr_b32 s0, s2, 16
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT:    v_cndmask_b32_e32 v1, s0, v4, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, s2, v6, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT:    v_cndmask_b32_e32 v2, s3, v5, vcc_lo
-; GFX1250-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT:    v_readfirstlane_b32 s1, v1
-; GFX1250-NEXT:    ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_vselect_v4bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_lshr_b32 s7, s1, 16
+; GFX1250TRUE16-NEXT:    s_lshr_b32 s9, s0, 16
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e64 s4, 0, v1
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e64 s5, 0, v2
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u32_e64 s6, 0, v3
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.l, s7
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v0.h, s9
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.l, s0
+; GFX1250TRUE16-NEXT:    v_mov_b16_e32 v1.h, s1
+; GFX1250TRUE16-NEXT:    s_lshr_b32 s8, s3, 16
+; GFX1250TRUE16-NEXT:    s_lshr_b32 s0, s2, 16
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v2.h, s8, v0.l, s6
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.h, s0, v0.h, s4
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, s2, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v2.l, s3, v1.h, s5
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX1250TRUE16-NEXT:    v_readfirstlane_b32 s1, v2
+; GFX1250TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_vselect_v4bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_lshr_b32 s4, s1, 16
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
+; GFX1250FAKE16-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s1
+; GFX1250FAKE16-NEXT:    s_lshr_b32 s4, s3, 16
+; GFX1250FAKE16-NEXT:    s_lshr_b32 s5, s0, 16
+; GFX1250FAKE16-NEXT:    v_mov_b32_e32 v6, s0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v3, s4, v4, vcc_lo
+; GFX1250FAKE16-NEXT:    v_mov_b32_e32 v4, s5
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX1250FAKE16-NEXT:    s_lshr_b32 s0, s2, 16
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v1, s0, v4, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, s2, v6, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v2, s3, v5, vcc_lo
+; GFX1250FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX1250FAKE16-NEXT:    ; return to shader part epilog
   %cond = icmp eq <4 x i32> %c, zeroinitializer
   %op = select <4 x i1> %cond, <4 x bfloat> %a, <4 x bfloat> %b
   %cast = bitcast <4 x bfloat> %op to <2 x i32>
@@ -45787,27 +46821,49 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
 ; GFX11FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_vselect_v4bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_and_b32_e32 v2, 1, v2
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v8, 16, v4 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v9, 16, v6 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT:    v_dual_cndmask_b32 v2, v7, v5, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v7, 16, v7 :: v_dual_lshrrev_b32 v5, 16, v5
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc_lo
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v4bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.h, 1, v2.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.l, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.h, 1, v3.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s0, 1, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s1, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s2, 1, v1.h
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.l, v7.l, v5.l, vcc_lo
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, v6.l, v4.l, s0
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.h, v6.h, v4.h, s1
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.h, v7.h, v5.h, s2
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v4bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v8, 16, v4 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v9, 16, v6 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v2, v7, v5, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v7, 16, v7 :: v_dual_lshrrev_b32 v5, 16, v5
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc_lo
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = select <4 x i1> %cond, <4 x bfloat> %a, <4 x bfloat> %b
   ret <4 x bfloat> %op
 }
@@ -46161,45 +47217,77 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
 ; GFX11FAKE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_vselect_v8bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_and_b32_e32 v6, 1, v6
-; GFX1250-NEXT:    v_and_b32_e32 v4, 1, v4
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v17, 16, v14 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v16, 16, v10 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1250-NEXT:    v_and_b32_e32 v1, 1, v1
-; GFX1250-NEXT:    v_dual_cndmask_b32 v6, v15, v11, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX1250-NEXT:    v_and_b32_e32 v7, 1, v7
-; GFX1250-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
-; GFX1250-NEXT:    v_dual_cndmask_b32 v4, v14, v10 :: v_dual_lshrrev_b32 v15, 16, v15
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v14, 16, v12 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
-; GFX1250-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
-; GFX1250-NEXT:    v_cndmask_b32_e32 v5, v17, v16, vcc_lo
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT:    v_cndmask_b32_e32 v2, v13, v9, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX1250-NEXT:    v_dual_cndmask_b32 v0, v12, v8 :: v_dual_lshrrev_b32 v13, 16, v13
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT:    v_cndmask_b32_e32 v1, v14, v10, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX1250-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT:    v_cndmask_b32_e32 v3, v13, v9, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX1250-NEXT:    v_cndmask_b32_e32 v7, v15, v11, vcc_lo
-; GFX1250-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v8bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.l, 1, v3.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.h, 1, v5.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s0, 1, v0.l
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s1, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.l, 1, v6.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.h, 1, v4.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.l, 1, v2.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v2.l, 1, v7.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s5, 1, v1.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s2, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s3, 1, v0.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s4, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s6, 1, v2.l
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, v12.l, v8.l, s0
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v3.l, v15.l, v11.l, s2
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v2.l, v14.l, v10.l, s3
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.l, v13.l, v9.l, s4
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.h, v12.h, v8.h, vcc_lo
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.h, v13.h, v9.h, s1
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v2.h, v14.h, v10.h, s5
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v3.h, v15.h, v11.h, s6
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v8bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v6, 1, v6
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v17, 16, v14 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v16, 16, v10 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v1, 1, v1
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v6, v15, v11, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v7, 1, v7
+; GFX1250FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v4, v14, v10 :: v_dual_lshrrev_b32 v15, 16, v15
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v14, 16, v12 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v5, v17, v16, vcc_lo
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v2, v13, v9, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v0, v12, v8 :: v_dual_lshrrev_b32 v13, 16, v13
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v1, v14, v10, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v3, v13, v9, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v7, v15, v11, vcc_lo
+; GFX1250FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = select <8 x i1> %cond, <8 x bfloat> %a, <8 x bfloat> %b
   ret <8 x bfloat> %op
 }
@@ -46939,73 +48027,129 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
 ; GFX11FAKE16-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_vselect_v16bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    scratch_load_b32 v31, off, s32
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v52, 16, v25 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v53, 16, v16 :: v_dual_bitop2_b32 v13, 1, v13 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v33, 16, v22 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v34, 16, v30 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v51, 16, v17 :: v_dual_bitop2_b32 v10, 1, v10 bitop3:0x40
-; GFX1250-NEXT:    v_cndmask_b32_e32 v12, v30, v22, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v50, 16, v26 :: v_dual_bitop2_b32 v11, 1, v11 bitop3:0x40
-; GFX1250-NEXT:    v_and_b32_e32 v14, 1, v14
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v35, 16, v21 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
-; GFX1250-NEXT:    v_cndmask_b32_e32 v13, v34, v33, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v36, 16, v29 :: v_dual_bitop2_b32 v4, 1, v4 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v49, 16, v18 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
-; GFX1250-NEXT:    v_cndmask_b32_e32 v10, v29, v21, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v37, 16, v20 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v38, 16, v28 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v48, 16, v27 :: v_dual_bitop2_b32 v9, 1, v9 bitop3:0x40
-; GFX1250-NEXT:    v_cndmask_b32_e32 v11, v36, v35, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v39, 16, v19 :: v_dual_bitop2_b32 v6, 1, v6 bitop3:0x40
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v32, 16, v23 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
-; GFX1250-NEXT:    v_cndmask_b32_e32 v8, v28, v20, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v54, 16, v24 :: v_dual_bitop2_b32 v15, 1, v15 bitop3:0x40
-; GFX1250-NEXT:    v_cndmask_b32_e32 v9, v38, v37, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1250-NEXT:    v_cndmask_b32_e32 v6, v27, v19, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX1250-NEXT:    v_cndmask_b32_e32 v4, v26, v18, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT:    v_cndmask_b32_e32 v2, v25, v17, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT:    v_cndmask_b32_e32 v3, v52, v51, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT:    v_cndmask_b32_e32 v0, v24, v16, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT:    v_cndmask_b32_e32 v1, v54, v53, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1250-NEXT:    v_cndmask_b32_e32 v5, v50, v49, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX1250-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
-; GFX1250-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-NEXT:    v_lshrrev_b32_e32 v3, 16, v31
-; GFX1250-NEXT:    v_cndmask_b32_e32 v7, v48, v39, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX1250-NEXT:    v_cndmask_b32_e32 v14, v31, v23, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX1250-NEXT:    v_cndmask_b32_e32 v15, v3, v32, vcc_lo
-; GFX1250-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v16bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v31, off, s32
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.l, 1, v3.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.h, 1, v2.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v2.l, 1, v5.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v2.h, 1, v4.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v3.l, 1, v7.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v3.h, 1, v6.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v4.l, 1, v9.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v4.h, 1, v8.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v5.l, 1, v11.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v5.h, 1, v10.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v6.l, 1, v13.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v6.h, 1, v12.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v7.l, 1, v15.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v7.h, 1, v14.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s0, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s1, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s2, 1, v1.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s3, 1, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s4, 1, v2.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s5, 1, v3.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s6, 1, v3.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s7, 1, v4.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s8, 1, v4.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s9, 1, v5.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s10, 1, v6.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s11, 1, v6.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s12, 1, v5.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s13, 1, v7.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s14, 1, v7.h
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v6.l, v30.l, v22.l, s10
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v6.h, v30.h, v22.h, s11
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v5.l, v29.l, v21.l, s12
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v5.h, v29.h, v21.h, s9
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v4.l, v28.l, v20.l, s8
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v4.h, v28.h, v20.h, s7
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v3.l, v27.l, v19.l, s6
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v3.h, v27.h, v19.h, s5
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v2.l, v26.l, v18.l, s4
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.l, v25.l, v17.l, s2
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, v24.l, v16.l, s0
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.h, v24.h, v16.h, vcc_lo
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.h, v25.h, v17.h, s1
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v2.h, v26.h, v18.h, s3
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v7.l, v31.l, v23.l, s14
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v7.h, v31.h, v23.h, s13
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v16bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v31, off, s32
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v52, 16, v25 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v53, 16, v16 :: v_dual_bitop2_b32 v13, 1, v13 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v33, 16, v22 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v34, 16, v30 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v51, 16, v17 :: v_dual_bitop2_b32 v10, 1, v10 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v12, v30, v22, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v50, 16, v26 :: v_dual_bitop2_b32 v11, 1, v11 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v14, 1, v14
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v35, 16, v21 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v13, v34, v33, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v36, 16, v29 :: v_dual_bitop2_b32 v4, 1, v4 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v49, 16, v18 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v10, v29, v21, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v37, 16, v20 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v38, 16, v28 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v48, 16, v27 :: v_dual_bitop2_b32 v9, 1, v9 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v11, v36, v35, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v39, 16, v19 :: v_dual_bitop2_b32 v6, 1, v6 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v32, 16, v23 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v8, v28, v20, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v54, 16, v24 :: v_dual_bitop2_b32 v15, 1, v15 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v9, v38, v37, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v6, v27, v19, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v4, v26, v18, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v2, v25, v17, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v3, v52, v51, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v0, v24, v16, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v1, v54, v53, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v5, v50, v49, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v31
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v7, v48, v39, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v14, v31, v23, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v15, v3, v32, vcc_lo
+; GFX1250FAKE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = select <16 x i1> %cond, <16 x bfloat> %a, <16 x bfloat> %b
   ret <16 x bfloat> %op
 }
@@ -48861,177 +50005,330 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
 ; GFX11FAKE16-NEXT:    v_perm_b32 v15, v31, v30, 0x5040100
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_vselect_v32bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    s_clause 0x1b
-; GFX1250-NEXT:    scratch_load_b32 v31, off, s32 offset:60
-; GFX1250-NEXT:    scratch_load_b32 v32, off, s32 offset:124
-; GFX1250-NEXT:    scratch_load_u16 v33, off, s32
-; GFX1250-NEXT:    scratch_load_b32 v34, off, s32 offset:128
-; GFX1250-NEXT:    scratch_load_b32 v35, off, s32 offset:64
-; GFX1250-NEXT:    scratch_load_b32 v36, off, s32 offset:120
-; GFX1250-NEXT:    scratch_load_b32 v37, off, s32 offset:56
-; GFX1250-NEXT:    scratch_load_b32 v38, off, s32 offset:116
-; GFX1250-NEXT:    scratch_load_b32 v39, off, s32 offset:52
-; GFX1250-NEXT:    scratch_load_b32 v48, off, s32 offset:112
-; GFX1250-NEXT:    scratch_load_b32 v49, off, s32 offset:48
-; GFX1250-NEXT:    scratch_load_b32 v50, off, s32 offset:108
-; GFX1250-NEXT:    scratch_load_b32 v51, off, s32 offset:44
-; GFX1250-NEXT:    scratch_load_b32 v52, off, s32 offset:104
-; GFX1250-NEXT:    scratch_load_b32 v53, off, s32 offset:40
-; GFX1250-NEXT:    scratch_load_b32 v54, off, s32 offset:100
-; GFX1250-NEXT:    scratch_load_b32 v55, off, s32 offset:36
-; GFX1250-NEXT:    scratch_load_b32 v64, off, s32 offset:76
-; GFX1250-NEXT:    scratch_load_b32 v65, off, s32 offset:12
-; GFX1250-NEXT:    scratch_load_b32 v66, off, s32 offset:96
-; GFX1250-NEXT:    scratch_load_b32 v67, off, s32 offset:32
-; GFX1250-NEXT:    scratch_load_b32 v68, off, s32 offset:80
-; GFX1250-NEXT:    scratch_load_b32 v69, off, s32 offset:84
-; GFX1250-NEXT:    scratch_load_b32 v70, off, s32 offset:92
-; GFX1250-NEXT:    scratch_load_b32 v71, off, s32 offset:28
-; GFX1250-NEXT:    scratch_load_b32 v80, off, s32 offset:20
-; GFX1250-NEXT:    scratch_load_b32 v81, off, s32 offset:88
-; GFX1250-NEXT:    scratch_load_b32 v82, off, s32 offset:24
-; GFX1250-NEXT:    v_and_b32_e32 v30, 1, v30
-; GFX1250-NEXT:    v_and_b32_e32 v29, 1, v29
-; GFX1250-NEXT:    v_and_b32_e32 v26, 1, v26
-; GFX1250-NEXT:    v_and_b32_e32 v24, 1, v24
-; GFX1250-NEXT:    v_and_b32_e32 v22, 1, v22
-; GFX1250-NEXT:    v_and_b32_e32 v20, 1, v20
-; GFX1250-NEXT:    v_and_b32_e32 v18, 1, v18
-; GFX1250-NEXT:    v_and_b32_e32 v16, 1, v16
-; GFX1250-NEXT:    v_and_b32_e32 v10, 1, v10
-; GFX1250-NEXT:    v_and_b32_e32 v6, 1, v6
-; GFX1250-NEXT:    v_and_b32_e32 v4, 1, v4
-; GFX1250-NEXT:    v_and_b32_e32 v1, 1, v1
-; GFX1250-NEXT:    v_and_b32_e32 v3, 1, v3
-; GFX1250-NEXT:    v_and_b32_e32 v5, 1, v5
-; GFX1250-NEXT:    v_and_b32_e32 v23, 1, v23
-; GFX1250-NEXT:    v_and_b32_e32 v9, 1, v9
-; GFX1250-NEXT:    v_and_b32_e32 v13, 1, v13
-; GFX1250-NEXT:    v_and_b32_e32 v15, 1, v15
-; GFX1250-NEXT:    v_and_b32_e32 v21, 1, v21
-; GFX1250-NEXT:    v_and_b32_e32 v11, 1, v11
-; GFX1250-NEXT:    v_and_b32_e32 v19, 1, v19
-; GFX1250-NEXT:    s_wait_loadcnt 0x1a
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v83, 16, v32 :: v_dual_bitop2_b32 v17, 1, v17 bitop3:0x40
-; GFX1250-NEXT:    v_cmp_eq_u32_e64 s1, 1, v30
-; GFX1250-NEXT:    v_and_b32_e32 v28, 1, v28
-; GFX1250-NEXT:    s_wait_loadcnt 0x17
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT:    v_dual_cndmask_b32 v30, v34, v35, s1 :: v_dual_bitop2_b32 v33, 1, v33 bitop3:0x40
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v28
-; GFX1250-NEXT:    v_lshrrev_b32_e32 v28, 16, v31
-; GFX1250-NEXT:    v_cmp_eq_u32_e64 s0, 1, v29
-; GFX1250-NEXT:    scratch_load_b32 v29, off, s32 offset:16
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_lshrrev_b32 v34, 16, v34
-; GFX1250-NEXT:    v_cndmask_b32_e32 v31, v32, v31, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v33
-; GFX1250-NEXT:    scratch_load_b32 v32, off, s32 offset:72
-; GFX1250-NEXT:    v_cndmask_b32_e64 v28, v83, v28, s0
-; GFX1250-NEXT:    scratch_load_b32 v83, off, s32 offset:4
-; GFX1250-NEXT:    v_cndmask_b32_e32 v34, v34, v35, vcc_lo
-; GFX1250-NEXT:    s_clause 0x1
-; GFX1250-NEXT:    scratch_load_b32 v35, off, s32 offset:68
-; GFX1250-NEXT:    scratch_load_b32 v33, off, s32 offset:8
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v26
-; GFX1250-NEXT:    s_wait_loadcnt 0x1a
-; GFX1250-NEXT:    v_dual_cndmask_b32 v26, v36, v37, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v24
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v37, 16, v37 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
-; GFX1250-NEXT:    s_wait_loadcnt 0x18
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v36, 16, v36 :: v_dual_cndmask_b32 v24, v38, v39, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v22
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v38, 16, v38 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
-; GFX1250-NEXT:    s_wait_loadcnt 0x16
-; GFX1250-NEXT:    v_dual_cndmask_b32 v22, v48, v49 :: v_dual_lshrrev_b32 v39, 16, v39
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v20
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v49, 16, v49 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
-; GFX1250-NEXT:    s_wait_loadcnt 0x14
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v48, 16, v48 :: v_dual_cndmask_b32 v20, v50, v51, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v18
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v51, 16, v51 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
-; GFX1250-NEXT:    s_wait_loadcnt 0x12
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v50, 16, v50 :: v_dual_cndmask_b32 v18, v52, v53, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v53, 16, v53 :: v_dual_bitop2_b32 v14, 1, v14 bitop3:0x40
-; GFX1250-NEXT:    s_wait_loadcnt 0x10
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v52, 16, v52 :: v_dual_cndmask_b32 v16, v54, v55, vcc_lo
-; GFX1250-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v55, 16, v55 :: v_dual_lshrrev_b32 v54, 16, v54
-; GFX1250-NEXT:    s_wait_loadcnt 0xc
-; GFX1250-NEXT:    v_cndmask_b32_e32 v14, v66, v67, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v67, 16, v67 :: v_dual_lshrrev_b32 v66, 16, v66
-; GFX1250-NEXT:    s_wait_loadcnt 0x8
-; GFX1250-NEXT:    v_cndmask_b32_e32 v12, v70, v71, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v70, 16, v70 :: v_dual_bitop2_b32 v25, 1, v25 bitop3:0x40
-; GFX1250-NEXT:    s_wait_loadcnt 0x5
-; GFX1250-NEXT:    v_dual_cndmask_b32 v10, v81, v82 :: v_dual_lshrrev_b32 v71, 16, v71
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v82, 16, v82 :: v_dual_bitop2_b32 v27, 1, v27 bitop3:0x40
-; GFX1250-NEXT:    v_dual_cndmask_b32 v8, v69, v80 :: v_dual_lshrrev_b32 v81, 16, v81
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v80, 16, v80 :: v_dual_lshrrev_b32 v69, 16, v69
-; GFX1250-NEXT:    s_wait_loadcnt 0x4
-; GFX1250-NEXT:    v_dual_cndmask_b32 v6, v68, v29 :: v_dual_lshrrev_b32 v29, 16, v29
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v68, 16, v68 :: v_dual_cndmask_b32 v4, v64, v65, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v65, 16, v65 :: v_dual_lshrrev_b32 v64, 16, v64
-; GFX1250-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-NEXT:    v_dual_cndmask_b32 v2, v32, v33 :: v_dual_lshrrev_b32 v33, 16, v33
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v32, 16, v32 :: v_dual_cndmask_b32 v0, v35, v83, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v27
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v83, 16, v83 :: v_dual_cndmask_b32 v27, v36, v37, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v25
-; GFX1250-NEXT:    v_cndmask_b32_e32 v25, v38, v39, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v23
-; GFX1250-NEXT:    v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_cndmask_b32 v23, v48, v49, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v21
-; GFX1250-NEXT:    v_cndmask_b32_e32 v21, v50, v51, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v19
-; GFX1250-NEXT:    v_cndmask_b32_e32 v19, v52, v53, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v17
-; GFX1250-NEXT:    v_cndmask_b32_e32 v17, v54, v55, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX1250-NEXT:    v_cndmask_b32_e32 v15, v66, v67, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX1250-NEXT:    v_cndmask_b32_e32 v13, v70, v71, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX1250-NEXT:    v_cndmask_b32_e32 v11, v81, v82, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX1250-NEXT:    v_cndmask_b32_e32 v7, v68, v29, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT:    v_cndmask_b32_e32 v3, v32, v33, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT:    v_cndmask_b32_e32 v1, v35, v83, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1250-NEXT:    v_cndmask_b32_e32 v5, v64, v65, vcc_lo
-; GFX1250-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX1250-NEXT:    v_cndmask_b32_e32 v9, v69, v80, vcc_lo
-; GFX1250-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v8, v17, v16, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v9, v19, v18, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v10, v21, v20, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v11, v23, v22, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v12, v25, v24, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v13, v27, v26, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v14, v28, v31, 0x5040100
-; GFX1250-NEXT:    v_perm_b32 v15, v34, v30, 0x5040100
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v32bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    s_clause 0x20
+; GFX1250TRUE16-NEXT:    scratch_load_u16 v31, off, s32
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v32, off, s32 offset:68
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v33, off, s32 offset:72
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v34, off, s32 offset:76
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v35, off, s32 offset:124
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v36, off, s32 offset:128
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v37, off, s32 offset:64
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v38, off, s32 offset:60
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v39, off, s32 offset:120
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v48, off, s32 offset:56
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v49, off, s32 offset:116
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v50, off, s32 offset:52
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v51, off, s32 offset:112
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v52, off, s32 offset:48
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v53, off, s32 offset:108
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v54, off, s32 offset:44
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v55, off, s32 offset:104
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v64, off, s32 offset:40
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v65, off, s32 offset:100
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v66, off, s32 offset:36
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v67, off, s32 offset:96
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v68, off, s32 offset:32
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v69, off, s32 offset:92
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v70, off, s32 offset:28
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v71, off, s32 offset:88
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v80, off, s32 offset:24
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v81, off, s32 offset:84
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v82, off, s32 offset:20
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v83, off, s32 offset:80
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v84, off, s32 offset:16
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v85, off, s32 offset:12
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v86, off, s32 offset:8
+; GFX1250TRUE16-NEXT:    scratch_load_b32 v87, off, s32 offset:4
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.l, 1, v3.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.h, 1, v2.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v2.l, 1, v9.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.h, 1, v4.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s1, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s0, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s2, 1, v1.h
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.l, 1, v5.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.l, 1, v7.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v1.h, 1, v6.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v2.h, 1, v8.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v3.l, 1, v11.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v3.h, 1, v10.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v4.l, 1, v13.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v4.h, 1, v12.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v5.l, 1, v15.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v5.h, 1, v14.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v6.l, 1, v17.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v6.h, 1, v16.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v7.l, 1, v19.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v7.h, 1, v18.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v8.l, 1, v21.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v8.h, 1, v20.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v9.l, 1, v23.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v9.h, 1, v22.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v10.l, 1, v25.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v10.h, 1, v24.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v11.l, 1, v27.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v11.h, 1, v26.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v12.l, 1, v29.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v12.h, 1, v28.l
+; GFX1250TRUE16-NEXT:    v_and_b16 v13.l, 1, v30.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s4, 1, v0.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s3, 1, v0.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s5, 1, v1.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s6, 1, v1.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s7, 1, v2.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s8, 1, v2.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s9, 1, v3.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s10, 1, v3.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s11, 1, v4.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s12, 1, v4.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s13, 1, v5.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s14, 1, v5.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s15, 1, v6.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s16, 1, v6.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s17, 1, v7.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s18, 1, v7.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s19, 1, v8.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s20, 1, v8.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s21, 1, v9.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s22, 1, v9.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s23, 1, v10.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s24, 1, v10.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s25, 1, v11.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s26, 1, v13.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s27, 1, v12.h
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s28, 1, v12.l
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s29, 1, v11.h
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x20
+; GFX1250TRUE16-NEXT:    v_and_b16 v0.h, 1, v31.l
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x1a
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v15.l, v36.l, v37.l, s26
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x19
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v14.l, v35.l, v38.l, s27
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v14.h, v35.h, v38.h, s28
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x17
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v13.l, v39.l, v48.l, s29
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v13.h, v39.h, v48.h, s25
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x15
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v12.l, v49.l, v50.l, s24
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v12.h, v49.h, v50.h, s23
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x13
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v11.l, v51.l, v52.l, s22
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v11.h, v51.h, v52.h, s21
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x11
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v10.l, v53.l, v54.l, s20
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v10.h, v53.h, v54.h, s19
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0xf
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v9.l, v55.l, v64.l, s18
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v9.h, v55.h, v64.h, s17
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0xd
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v8.l, v65.l, v66.l, s16
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v8.h, v65.h, v66.h, s15
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0xb
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v7.l, v67.l, v68.l, s14
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v7.h, v67.h, v68.h, s13
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x9
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v6.l, v69.l, v70.l, s12
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v6.h, v69.h, v70.h, s11
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x7
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v5.l, v71.l, v80.l, s10
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v5.h, v71.h, v80.h, s9
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x5
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v4.l, v81.l, v82.l, s8
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v4.h, v81.h, v82.h, s7
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x3
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v3.l, v83.l, v84.l, s6
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x2
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v2.l, v34.l, v85.l, s4
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x1
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.l, v33.l, v86.l, s2
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.l, v32.l, v87.l, s1
+; GFX1250TRUE16-NEXT:    v_cmp_eq_u16_e64 s1, 1, v0.h
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v0.h, v32.h, v87.h, vcc_lo
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v1.h, v33.h, v86.h, s0
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v2.h, v34.h, v85.h, s3
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v3.h, v83.h, v84.h, s5
+; GFX1250TRUE16-NEXT:    v_cndmask_b16 v15.h, v36.h, v37.h, s1
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v32bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    s_clause 0x1b
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v31, off, s32 offset:60
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v32, off, s32 offset:124
+; GFX1250FAKE16-NEXT:    scratch_load_u16 v33, off, s32
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v34, off, s32 offset:128
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v35, off, s32 offset:64
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v36, off, s32 offset:120
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v37, off, s32 offset:56
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v38, off, s32 offset:116
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v39, off, s32 offset:52
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v48, off, s32 offset:112
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v49, off, s32 offset:48
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v50, off, s32 offset:108
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v51, off, s32 offset:44
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v52, off, s32 offset:104
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v53, off, s32 offset:40
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v54, off, s32 offset:100
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v55, off, s32 offset:36
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v64, off, s32 offset:76
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v65, off, s32 offset:12
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v66, off, s32 offset:96
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v67, off, s32 offset:32
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v68, off, s32 offset:80
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v69, off, s32 offset:84
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v70, off, s32 offset:92
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v71, off, s32 offset:28
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v80, off, s32 offset:20
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v81, off, s32 offset:88
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v82, off, s32 offset:24
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v30, 1, v30
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v29, 1, v29
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v26, 1, v26
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v24, 1, v24
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v22, 1, v22
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v20, 1, v20
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v18, 1, v18
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v16, 1, v16
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v10, 1, v10
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v6, 1, v6
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v1, 1, v1
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v5, 1, v5
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v23, 1, v23
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v9, 1, v9
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v13, 1, v13
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v15, 1, v15
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v21, 1, v21
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v11, 1, v11
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v19, 1, v19
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x1a
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v83, 16, v32 :: v_dual_bitop2_b32 v17, 1, v17 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e64 s1, 1, v30
+; GFX1250FAKE16-NEXT:    v_and_b32_e32 v28, 1, v28
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x17
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v30, v34, v35, s1 :: v_dual_bitop2_b32 v33, 1, v33 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v28
+; GFX1250FAKE16-NEXT:    v_lshrrev_b32_e32 v28, 16, v31
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e64 s0, 1, v29
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v29, off, s32 offset:16
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_lshrrev_b32 v34, 16, v34
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v31, v32, v31, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v33
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v32, off, s32 offset:72
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e64 v28, v83, v28, s0
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v83, off, s32 offset:4
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v34, v34, v35, vcc_lo
+; GFX1250FAKE16-NEXT:    s_clause 0x1
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v35, off, s32 offset:68
+; GFX1250FAKE16-NEXT:    scratch_load_b32 v33, off, s32 offset:8
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v26
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x1a
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v26, v36, v37, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v24
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v37, 16, v37 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x18
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v36, 16, v36 :: v_dual_cndmask_b32 v24, v38, v39, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v22
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v38, 16, v38 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x16
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v22, v48, v49 :: v_dual_lshrrev_b32 v39, 16, v39
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v20
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v49, 16, v49 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x14
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v48, 16, v48 :: v_dual_cndmask_b32 v20, v50, v51, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v18
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v51, 16, v51 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x12
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v50, 16, v50 :: v_dual_cndmask_b32 v18, v52, v53, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v53, 16, v53 :: v_dual_bitop2_b32 v14, 1, v14 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x10
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v52, 16, v52 :: v_dual_cndmask_b32 v16, v54, v55, vcc_lo
+; GFX1250FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v55, 16, v55 :: v_dual_lshrrev_b32 v54, 16, v54
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0xc
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v14, v66, v67, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v67, 16, v67 :: v_dual_lshrrev_b32 v66, 16, v66
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x8
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v12, v70, v71, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v70, 16, v70 :: v_dual_bitop2_b32 v25, 1, v25 bitop3:0x40
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x5
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v10, v81, v82 :: v_dual_lshrrev_b32 v71, 16, v71
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v82, 16, v82 :: v_dual_bitop2_b32 v27, 1, v27 bitop3:0x40
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v8, v69, v80 :: v_dual_lshrrev_b32 v81, 16, v81
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v80, 16, v80 :: v_dual_lshrrev_b32 v69, 16, v69
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x4
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v6, v68, v29 :: v_dual_lshrrev_b32 v29, 16, v29
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v68, 16, v68 :: v_dual_cndmask_b32 v4, v64, v65, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v65, 16, v65 :: v_dual_lshrrev_b32 v64, 16, v64
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250FAKE16-NEXT:    v_dual_cndmask_b32 v2, v32, v33 :: v_dual_lshrrev_b32 v33, 16, v33
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v32, 16, v32 :: v_dual_cndmask_b32 v0, v35, v83, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v27
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v83, 16, v83 :: v_dual_cndmask_b32 v27, v36, v37, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v25
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v25, v38, v39, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v23
+; GFX1250FAKE16-NEXT:    v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_cndmask_b32 v23, v48, v49, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v21
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v21, v50, v51, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v19
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v19, v52, v53, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v17
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v17, v54, v55, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v15, v66, v67, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v13, v70, v71, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v11, v81, v82, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v7, v68, v29, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v3, v32, v33, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v1, v35, v83, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v5, v64, v65, vcc_lo
+; GFX1250FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX1250FAKE16-NEXT:    v_cndmask_b32_e32 v9, v69, v80, vcc_lo
+; GFX1250FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v8, v17, v16, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v9, v19, v18, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v10, v21, v20, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v11, v23, v22, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v12, v25, v24, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v13, v27, v26, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v14, v28, v31, 0x5040100
+; GFX1250FAKE16-NEXT:    v_perm_b32 v15, v34, v30, 0x5040100
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = select <32 x i1> %cond, <32 x bfloat> %a, <32 x bfloat> %b
   ret <32 x bfloat> %op
 }
@@ -49167,12 +50464,21 @@ define bfloat @v_fma_bf16(bfloat %a, bfloat %b, bfloat %c) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fma_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fma_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_fma_mix_f32_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fma_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
   ret bfloat %op
 }
@@ -54791,12 +56097,21 @@ define bfloat @v_fmuladd_bf16(bfloat %a, bfloat %b, bfloat %c) {
 ; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX1250-LABEL: v_fmuladd_bf16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fmuladd_bf16:
+; GFX1250TRUE16:       ; %bb.0:
+; GFX1250TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT:    v_fma_mix_f32_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT:    v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT:    s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fmuladd_bf16:
+; GFX1250FAKE16:       ; %bb.0:
+; GFX1250FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT:    v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250FAKE16-NEXT:    s_set_pc_i64 s[30:31]
   %op = call bfloat @llvm.fmuladd.bf16(bfloat %a, bfloat %b, bfloat %c)
   ret bfloat %op
 }
@@ -55652,5 +56967,3 @@ define <4 x bfloat> @v_fmuladd_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfl
   %op = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c)
   ret <4 x bfloat> %op
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX1250FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 363a248..cbf6b66 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -1262,7 +1262,7 @@ define amdgpu_ps void @ps_mesa_i16(i16 %arg0) {
 ; GFX1250-TRUE16-LABEL: ps_mesa_i16:
 ; GFX1250-TRUE16:       ; %bb.0:
 ; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, v0.l, v0.l
-; GFX1250-TRUE16-NEXT:    flat_store_b16 v[0:1], v0
+; GFX1250-TRUE16-NEXT:    global_store_b16 v[0:1], v0, off
 ; GFX1250-TRUE16-NEXT:    s_endpgm
 ;
 ; GFX1250-FAKE16-LABEL: ps_mesa_i16:
@@ -3013,7 +3013,7 @@ define amdgpu_cs void @amdgpu_cs_v8i1(<8 x i1> %arg0) {
 ; GFX1250-TRUE16-NEXT:    v_lshlrev_b16 v0.h, 4, v0.h
 ; GFX1250-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1250-TRUE16-NEXT:    v_bitop3_b16 v0.l, v0.l, v0.h, 15 bitop3:0xec
-; GFX1250-TRUE16-NEXT:    flat_store_b8 v[0:1], v0
+; GFX1250-TRUE16-NEXT:    global_store_b8 v[0:1], v0, off
 ; GFX1250-TRUE16-NEXT:    s_endpgm
 ;
 ; GFX1250-FAKE16-LABEL: amdgpu_cs_v8i1:
@@ -3297,7 +3297,7 @@ define amdgpu_cs void @amdgpu_cs_v16i1(<16 x i1> %arg0) {
 ; GFX1250-TRUE16-NEXT:    v_or_b16 v0.h, v1.h, v1.l
 ; GFX1250-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1250-TRUE16-NEXT:    v_bitop3_b16 v0.l, v0.l, v0.h, 0xff bitop3:0xec
-; GFX1250-TRUE16-NEXT:    flat_store_b16 v[0:1], v0
+; GFX1250-TRUE16-NEXT:    global_store_b16 v[0:1], v0, off
 ; GFX1250-TRUE16-NEXT:    s_endpgm
 ;
 ; GFX1250-FAKE16-LABEL: amdgpu_cs_v16i1:
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index f706f53..eb40e5c 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -35,6 +35,6 @@ define amdgpu_kernel void @test_direct_indirect_call() {
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index 8da204b..c02ff28 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -28,6 +28,6 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
 attributes #0 = { "amdgpu-no-dispatch-id" }
 
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index ab51693..05d3e9c3 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -497,12 +497,10 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace
   ret void
 }
 
-; FIXME: Should there be more checks here? minnum with NaN operand is simplified away.
+; FIXME: Should there be more checks here? minnum with sNaN operand is simplified to qNaN.
 
 ; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
-; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
-; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
-; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
+; GCN: v_mov_b32_e32 v{{.+}}, 0x7fc00000
 define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 3de6df2..833be20 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -1949,8 +1949,7 @@ define float @v_fneg_self_minimumnum_f32_ieee(float %a) #0 {
 ; GCN-LABEL: v_fneg_self_minimumnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_max_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float %a, float %a)
   %min.fneg = fneg float %min
@@ -1961,7 +1960,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %min = call float @llvm.minimumnum.f32(float %a, float %a)
   %min.fneg = fneg float %min
@@ -2285,8 +2284,7 @@ define float @v_fneg_self_maximumnum_f32_ieee(float %a) #0 {
 ; GCN-LABEL: v_fneg_self_maximumnum_f32_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT:    v_min_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float %a, float %a)
   %max.fneg = fneg float %max
@@ -2297,7 +2295,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 {
 ; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v0
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %max = call float @llvm.maximumnum.f32(float %a, float %a)
   %max.fneg = fneg float %max
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 40d2765..b0dd187 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -11,9 +11,9 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s
-;  TODO: FIXME-TRUE16 llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-TRUE16 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-FAKE16 %s
-; TODO: FIXME-TRUE16  llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-TRUE16 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-FAKE16 %s
 
 define amdgpu_kernel void @fptrunc_f32_to_f16(
@@ -197,6 +197,24 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -215,6 +233,21 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -419,6 +452,24 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_afn(ptr addrspace(1) %r,
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_afn:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -437,6 +488,21 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_afn(ptr addrspace(1) %r,
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_afn:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -1160,6 +1226,73 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b64 v[0:1], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_readfirstlane_b32 s2, v1
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s3, s2, 0x1ff
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s5, s2, 8
+; GFX1250-SDAG-TRUE16-NEXT:    v_or_b32_e32 v0, s3, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_bfe_u32 s3, s2, 0xb0014
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s5, s5, 0xffe
+; GFX1250-SDAG-TRUE16-NEXT:    s_sub_co_i32 s4, 0x3f1, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1250-SDAG-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX1250-SDAG-TRUE16-NEXT:    v_med3_i32 v1, s4, 0, 13
+; GFX1250-SDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250-SDAG-TRUE16-NEXT:    v_readfirstlane_b32 s8, v1
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_readfirstlane_b32 s4, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s4, s5, s4
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s5, s4, 0x1000
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s9, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshl_b32 s8, s9, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lg_u32 s8, s5
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_addk_co_i32 s3, 0xfc10
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s5, s9, s5
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshl_b32 s8, s3, 12
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s8, s4, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lt_i32 s3, 1
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s8, s5, 7
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_gt_i32 s8, 5
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s9, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_eq_u32 s8, 3
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s8, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s5, s5, 2
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s8, s8, s9
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_add_co_i32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lt_i32 s3, 31
+; GFX1250-SDAG-TRUE16-NEXT:    s_movk_i32 s8, 0x7e00
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s5, s5, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lg_u32 s4, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s4, s8, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_eq_u32 s3, 0x40f
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s3, s4, s5
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s2, s2, 0x8000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s2, s2, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -1227,6 +1360,63 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s6, s3, 0x1ff
+; GFX1250-GISEL-TRUE16-NEXT:    s_bfe_u32 s4, s3, 0xb0014
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s5, s3, 8
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s2, s6, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_addk_co_i32 s4, 0xfc10
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s5, s5, 0xffe
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s2, s5, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_sub_co_i32 s6, 1, s4
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s8, s2, 0x1000
+; GFX1250-GISEL-TRUE16-NEXT:    s_max_i32 s6, s6, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshl_b32 s7, s4, 12
+; GFX1250-GISEL-TRUE16-NEXT:    s_min_i32 s6, s6, 13
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshl_b32 s5, s5, 9
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s9, s8, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s2, s2, s7
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshl_b32 s6, s9, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s5, s5, 0x7c00
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lg_u32 s6, s8
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s6, s9, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lt_i32 s4, 1
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s2, s6, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s6, s2, 7
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s2, s2, 2
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_eq_u32 s6, 3
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s7, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_gt_i32 s6, 5
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s6, s7, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_add_co_i32 s2, s2, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_gt_i32 s4, 30
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s2, 0x7c00, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_eq_u32 s4, 0x40f
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s2, s5, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s3, s3, 16
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s3, s3, 0x8000
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s2, s3, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -1489,6 +1679,26 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b64 v[0:1], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -1509,6 +1719,20 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -1740,6 +1964,24 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b64 v[0:1], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_pk_f16_f32 v0, v0, v1
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f32_to_v2f16:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -1758,6 +2000,20 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b64_e32 v[0:1], s[2:3]
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-TRUE16-NEXT:    v_cvt_pk_f16_f32 v0, v0, v1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f32_to_v2f16:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -3017,6 +3273,122 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b128 v[0:3], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_readfirstlane_b32 s2, v3
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s3, s2, 0x1ff
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s5, s2, 8
+; GFX1250-SDAG-TRUE16-NEXT:    v_or_b32_e32 v2, s3, v2
+; GFX1250-SDAG-TRUE16-NEXT:    s_bfe_u32 s3, s2, 0xb0014
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s5, s5, 0xffe
+; GFX1250-SDAG-TRUE16-NEXT:    s_sub_co_i32 s4, 0x3f1, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1250-SDAG-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX1250-SDAG-TRUE16-NEXT:    v_med3_i32 v3, s4, 0, 13
+; GFX1250-SDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1250-SDAG-TRUE16-NEXT:    v_readfirstlane_b32 s8, v3
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_readfirstlane_b32 s4, v2
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s4, s5, s4
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s5, s4, 0x1000
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s9, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshl_b32 s8, s9, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lg_u32 s8, s5
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_addk_co_i32 s3, 0xfc10
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s5, s9, s5
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshl_b32 s8, s3, 12
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s8, s4, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lt_i32 s3, 1
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s8, s5, 7
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_gt_i32 s8, 5
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s9, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_eq_u32 s8, 3
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s8, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s5, s5, 2
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s8, s8, s9
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_add_co_i32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lt_i32 s3, 31
+; GFX1250-SDAG-TRUE16-NEXT:    s_movk_i32 s8, 0x7e00
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s5, s5, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lg_u32 s4, 0
+; GFX1250-SDAG-TRUE16-NEXT:    v_readfirstlane_b32 s4, v1
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s9, s8, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_eq_u32 s3, 0x40f
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s3, s9, s5
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s5, s4, 0x1ff
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s10, s4, 8
+; GFX1250-SDAG-TRUE16-NEXT:    v_or_b32_e32 v0, s5, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_bfe_u32 s5, s4, 0xb0014
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s10, s10, 0xffe
+; GFX1250-SDAG-TRUE16-NEXT:    s_sub_co_i32 s9, 0x3f1, s5
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX1250-SDAG-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX1250-SDAG-TRUE16-NEXT:    v_med3_i32 v1, s9, 0, 13
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s2, s2, 0x8000
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s2, s2, s3
+; GFX1250-SDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250-SDAG-TRUE16-NEXT:    v_readfirstlane_b32 s11, v1
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_readfirstlane_b32 s9, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s9, s10, s9
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s10, s9, 0x1000
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s12, s10, s11
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshl_b32 s11, s12, s11
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lg_u32 s11, s10
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_addk_co_i32 s5, 0xfc10
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s3, s12, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshl_b32 s10, s5, 12
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s10, s9, s10
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lt_i32 s5, 1
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s3, s3, s10
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s10, s3, 7
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_gt_i32 s10, 5
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s11, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_eq_u32 s10, 3
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s10, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s3, s3, 2
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s10, s10, s11
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_add_co_i32 s3, s3, s10
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lt_i32 s5, 31
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s3, s3, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_lg_u32 s9, 0
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s8, s8, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT:    s_cmp_eq_u32 s5, 0x40f
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_cselect_b32 s3, s8, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_lshr_b32 s4, s4, 16
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    s_and_b32 s4, s4, 0x8000
+; GFX1250-SDAG-TRUE16-NEXT:    s_or_b32 s3, s4, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    s_pack_ll_b32_b16 s2, s3, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -3133,6 +3505,109 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s8, s5, 0x1ff
+; GFX1250-GISEL-TRUE16-NEXT:    s_bfe_u32 s2, s5, 0xb0014
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s3, s5, 8
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s4, s8, s4
+; GFX1250-GISEL-TRUE16-NEXT:    s_addk_co_i32 s2, 0xfc10
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s3, s3, 0xffe
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lg_u32 s4, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s4, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s3, s3, s4
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s4, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_sub_co_i32 s8, 1, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s10, s3, 0x1000
+; GFX1250-GISEL-TRUE16-NEXT:    s_max_i32 s8, s8, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshl_b32 s9, s2, 12
+; GFX1250-GISEL-TRUE16-NEXT:    s_min_i32 s8, s8, 13
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshl_b32 s4, s4, 9
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s11, s10, s8
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s3, s3, s9
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshl_b32 s8, s11, s8
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s4, s4, 0x7c00
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lg_u32 s8, s10
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s8, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s8, s11, s8
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lt_i32 s2, 1
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s3, s8, s3
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s8, s3, 7
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s3, s3, 2
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_eq_u32 s8, 3
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s9, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_gt_i32 s8, 5
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s8, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s8, s9, s8
+; GFX1250-GISEL-TRUE16-NEXT:    s_add_co_i32 s3, s3, s8
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_gt_i32 s2, 30
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s3, 0x7c00, s3
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_eq_u32 s2, 0x40f
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s2, s4, s3
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s3, s5, 16
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s8, s7, 0x1ff
+; GFX1250-GISEL-TRUE16-NEXT:    s_bfe_u32 s4, s7, 0xb0014
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s5, s7, 8
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s3, s3, 0x8000
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s6, s8, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_addk_co_i32 s4, 0xfc10
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s5, s5, 0xffe
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s2, s3, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lg_u32 s6, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s3, s5, s3
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s5, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_sub_co_i32 s6, 1, s4
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s9, s3, 0x1000
+; GFX1250-GISEL-TRUE16-NEXT:    s_max_i32 s6, s6, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshl_b32 s8, s4, 12
+; GFX1250-GISEL-TRUE16-NEXT:    s_min_i32 s6, s6, 13
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshl_b32 s5, s5, 9
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s10, s9, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s3, s3, s8
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshl_b32 s6, s10, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s5, s5, 0x7c00
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lg_u32 s6, s9
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s6, s10, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_lt_i32 s4, 1
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s3, s6, s3
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s6, s3, 7
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s3, s3, 2
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_eq_u32 s6, 3
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s8, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_gt_i32 s6, 5
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s6, s8, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_add_co_i32 s3, s3, s6
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_gt_i32 s4, 30
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s3, 0x7c00, s3
+; GFX1250-GISEL-TRUE16-NEXT:    s_cmp_eq_u32 s4, 0x40f
+; GFX1250-GISEL-TRUE16-NEXT:    s_cselect_b32 s3, s5, s3
+; GFX1250-GISEL-TRUE16-NEXT:    s_lshr_b32 s4, s7, 16
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s4, s4, 0x8000
+; GFX1250-GISEL-TRUE16-NEXT:    s_or_b32 s3, s4, s3
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -3481,6 +3956,27 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b128 v[0:3], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f32_f64_e32 v2, v[2:3]
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_pk_f16_f32 v0, v0, v2
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -3502,6 +3998,25 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
+; GFX1250-GISEL-TRUE16-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-GISEL-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-GISEL-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.h, v1
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -3710,6 +4225,26 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fneg_fptrunc_f32_to_f16:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -3730,6 +4265,22 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_xor_b32 s2, s2, 0x80000000
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT:    s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fneg_fptrunc_f32_to_f16:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -3936,6 +4487,26 @@ define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fabs_fptrunc_f32_to_f16:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -3956,6 +4527,22 @@ define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_bitset0_b32 s2, 31
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT:    s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fabs_fptrunc_f32_to_f16:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -4162,6 +4749,26 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80000000, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -4182,6 +4789,22 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_bitset1_b32 s2, 31
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT:    s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -4396,6 +5019,26 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -4416,6 +5059,22 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s2, 0xffff, s2
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -4630,6 +5289,27 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -4651,6 +5331,24 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_bitset0_b32 s2, 31
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT:    s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_and_b32 s2, 0xffff, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -4877,6 +5575,26 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
 ; GFX11-GISEL-FAKE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
 ; GFX11-GISEL-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX1250-SDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX1250-SDAG-TRUE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32:
 ; GFX1250-SDAG-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-SDAG-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
@@ -4897,6 +5615,22 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
 ; GFX1250-SDAG-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
 ; GFX1250-SDAG-FAKE16-NEXT:    s_endpgm
 ;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX1250-GISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT:    s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT:    s_sext_i32_i16 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT:    s_endpgm
+;
 ; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32:
 ; GFX1250-GISEL-FAKE16:       ; %bb.0: ; %entry
 ; GFX1250-GISEL-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 743431c..d6a9cb1 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -92,43 +92,11 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_f32(ptr addrspace(1) %out, ptr addrsp
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
-; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
-; SI-NOT: xor
-define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
-  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
-  %a = load float, ptr addrspace(1) %in, align 4
-  %b = load float, ptr addrspace(1) %b_ptr, align 4
-  %result = fsub float %a, %b
-  %neg.result = fsub float -0.0, %result
-  store float %neg.result, ptr addrspace(1) %out, align 4
-  ret void
-}
-
-; For some reason the attribute has a string "true" or "false", so
-; make sure it is disabled and the fneg is not folded if it is not
-; "true".
-; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
-; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
-define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
-  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
-  %a = load float, ptr addrspace(1) %in, align 4
-  %b = load float, ptr addrspace(1) %b_ptr, align 4
-  %result = fsub float %a, %b
-  %neg.result = fsub float -0.0, %result
-  store float %neg.result, ptr addrspace(1) %out, align 4
-  ret void
-}
-
-; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32:
+; FUNC-LABEL: {{^}}v_fsub_0_nsz_flag_f32:
 ; SI-NOT: v_sub
-define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fsub_0_nsz_flag_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %a = load float, ptr addrspace(1) %in, align 4
-  %result = fsub float %a, 0.0
+  %result = fsub nsz float %a, 0.0
   store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
-
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index 3089054..32f7d6b 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -276,23 +276,23 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
 
 ;.
 ; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V4: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR5]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ;.
 ; V5: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V5: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ;.
 ; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V6: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
 ;.
 ; V4: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
index d3ef1b7..a0f5d2f 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
@@ -68,6 +68,6 @@ if.end:
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
index 71a330e..4e952b6 100644
--- a/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
@@ -55,8 +55,8 @@ define amdgpu_kernel void @issue120256_private(ptr addrspace(1) %out) {
 ; FIXME: Inference of amdgpu-no-queue-ptr should not depend on code object version.
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
 ;.
 ; CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
index 6ccfad7..ff47563 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
@@ -14,7 +14,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
 ; GFX1250-SDAG-REAL16:       ; %bb.0:
 ; GFX1250-SDAG-REAL16-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
 ; GFX1250-SDAG-REAL16-NEXT:    v_cvt_pk_bf8_f16 v0.l, v0
-; GFX1250-SDAG-REAL16-NEXT:    flat_store_b16 v[2:3], v0
+; GFX1250-SDAG-REAL16-NEXT:    global_store_b16 v[2:3], v0, off
 ; GFX1250-SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
@@ -28,7 +28,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
 ; GFX1250-GISEL-REAL16:       ; %bb.0:
 ; GFX1250-GISEL-REAL16-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
 ; GFX1250-GISEL-REAL16-NEXT:    v_cvt_pk_bf8_f16 v0.l, v0
-; GFX1250-GISEL-REAL16-NEXT:    flat_store_b16 v[4:5], v0
+; GFX1250-GISEL-REAL16-NEXT:    global_store_b16 v[4:5], v0, off
 ; GFX1250-GISEL-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
@@ -46,7 +46,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_s(<2 x half> inreg %a, ptr addrspace(
 ; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
 ; GFX1250-SDAG-REAL16:       ; %bb.0:
 ; GFX1250-SDAG-REAL16-NEXT:    v_cvt_pk_bf8_f16 v2.l, s0
-; GFX1250-SDAG-REAL16-NEXT:    flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX1250-SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
@@ -58,7 +58,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_s(<2 x half> inreg %a, ptr addrspace(
 ; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
 ; GFX1250-GISEL-REAL16:       ; %bb.0:
 ; GFX1250-GISEL-REAL16-NEXT:    v_cvt_pk_bf8_f16 v2.l, s0
-; GFX1250-GISEL-REAL16-NEXT:    flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX1250-GISEL-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
@@ -75,7 +75,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_l(ptr addrspace(1) %out) {
 ; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
 ; GFX1250-SDAG-REAL16:       ; %bb.0:
 ; GFX1250-SDAG-REAL16-NEXT:    v_cvt_pk_bf8_f16 v2.l, 0x56400000
-; GFX1250-SDAG-REAL16-NEXT:    flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX1250-SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
@@ -87,7 +87,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_l(ptr addrspace(1) %out) {
 ; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
 ; GFX1250-GISEL-REAL16:       ; %bb.0:
 ; GFX1250-GISEL-REAL16-NEXT:    v_cvt_pk_bf8_f16 v2.l, 0x56400000
-; GFX1250-GISEL-REAL16-NEXT:    flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX1250-GISEL-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
@@ -105,7 +105,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
 ; GFX1250-SDAG-REAL16:       ; %bb.0:
 ; GFX1250-SDAG-REAL16-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
 ; GFX1250-SDAG-REAL16-NEXT:    v_cvt_pk_fp8_f16 v0.l, v0
-; GFX1250-SDAG-REAL16-NEXT:    flat_store_b16 v[2:3], v0
+; GFX1250-SDAG-REAL16-NEXT:    global_store_b16 v[2:3], v0, off
 ; GFX1250-SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
@@ -119,7 +119,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
 ; GFX1250-GISEL-REAL16:       ; %bb.0:
 ; GFX1250-GISEL-REAL16-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
 ; GFX1250-GISEL-REAL16-NEXT:    v_cvt_pk_fp8_f16 v0.l, v0
-; GFX1250-GISEL-REAL16-NEXT:    flat_store_b16 v[4:5], v0
+; GFX1250-GISEL-REAL16-NEXT:    global_store_b16 v[4:5], v0, off
 ; GFX1250-GISEL-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
@@ -137,7 +137,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_s(<2 x half> inreg %a, ptr addrspace(
 ; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
 ; GFX1250-SDAG-REAL16:       ; %bb.0:
 ; GFX1250-SDAG-REAL16-NEXT:    v_cvt_pk_fp8_f16 v2.l, s0
-; GFX1250-SDAG-REAL16-NEXT:    flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX1250-SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
@@ -149,7 +149,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_s(<2 x half> inreg %a, ptr addrspace(
 ; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
 ; GFX1250-GISEL-REAL16:       ; %bb.0:
 ; GFX1250-GISEL-REAL16-NEXT:    v_cvt_pk_fp8_f16 v2.l, s0
-; GFX1250-GISEL-REAL16-NEXT:    flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX1250-GISEL-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
@@ -166,7 +166,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_l(ptr addrspace(1) %out) {
 ; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
 ; GFX1250-SDAG-REAL16:       ; %bb.0:
 ; GFX1250-SDAG-REAL16-NEXT:    v_cvt_pk_fp8_f16 v2.l, 0x56400000
-; GFX1250-SDAG-REAL16-NEXT:    flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX1250-SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
@@ -178,7 +178,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_l(ptr addrspace(1) %out) {
 ; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
 ; GFX1250-GISEL-REAL16:       ; %bb.0:
 ; GFX1250-GISEL-REAL16-NEXT:    v_cvt_pk_fp8_f16 v2.l, 0x56400000
-; GFX1250-GISEL-REAL16-NEXT:    flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX1250-GISEL-REAL16-NEXT:    s_endpgm
 ;
 ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.form.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.form.ll
index 87a7c2e..cc4cc8e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.form.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.form.ll
@@ -72,5 +72,206 @@ define <4 x float> @request_no_agpr(<8 x half> %arg0, <8 x half> %arg1, <4 x flo
   ret <4 x float> %result
 }
 
+; Make sure this selects the VGPR form, if AGPRs available, but not
+; enough.
+define amdgpu_kernel void @not_enough_agprs(ptr addrspace(1) %arg) #2 {
+; HEURRC-LABEL: not_enough_agprs:
+; HEURRC:       ; %bb.0: ; %bb
+; HEURRC-NEXT:    s_load_dwordx2 s[34:35], s[4:5], 0x24
+; HEURRC-NEXT:    v_mov_b32_e32 v33, 1.0
+; HEURRC-NEXT:    v_mov_b32_e32 v34, 2.0
+; HEURRC-NEXT:    v_mov_b32_e32 v32, 0
+; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
+; HEURRC-NEXT:    s_load_dwordx16 s[16:31], s[34:35], 0x0
+; HEURRC-NEXT:    s_load_dwordx16 s[0:15], s[34:35], 0x40
+; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
+; HEURRC-NEXT:    v_mov_b32_e32 v0, s16
+; HEURRC-NEXT:    v_mov_b32_e32 v1, s17
+; HEURRC-NEXT:    v_mov_b32_e32 v2, s18
+; HEURRC-NEXT:    v_mov_b32_e32 v3, s19
+; HEURRC-NEXT:    v_mov_b32_e32 v4, s20
+; HEURRC-NEXT:    v_mov_b32_e32 v5, s21
+; HEURRC-NEXT:    v_mov_b32_e32 v6, s22
+; HEURRC-NEXT:    v_mov_b32_e32 v7, s23
+; HEURRC-NEXT:    v_mov_b32_e32 v8, s24
+; HEURRC-NEXT:    v_mov_b32_e32 v9, s25
+; HEURRC-NEXT:    v_mov_b32_e32 v10, s26
+; HEURRC-NEXT:    v_mov_b32_e32 v11, s27
+; HEURRC-NEXT:    v_mov_b32_e32 v12, s28
+; HEURRC-NEXT:    v_mov_b32_e32 v13, s29
+; HEURRC-NEXT:    v_mov_b32_e32 v14, s30
+; HEURRC-NEXT:    v_mov_b32_e32 v15, s31
+; HEURRC-NEXT:    v_mov_b32_e32 v16, s0
+; HEURRC-NEXT:    v_mov_b32_e32 v17, s1
+; HEURRC-NEXT:    v_mov_b32_e32 v18, s2
+; HEURRC-NEXT:    v_mov_b32_e32 v19, s3
+; HEURRC-NEXT:    v_mov_b32_e32 v20, s4
+; HEURRC-NEXT:    v_mov_b32_e32 v21, s5
+; HEURRC-NEXT:    v_mov_b32_e32 v22, s6
+; HEURRC-NEXT:    v_mov_b32_e32 v23, s7
+; HEURRC-NEXT:    v_mov_b32_e32 v24, s8
+; HEURRC-NEXT:    v_mov_b32_e32 v25, s9
+; HEURRC-NEXT:    v_mov_b32_e32 v26, s10
+; HEURRC-NEXT:    v_mov_b32_e32 v27, s11
+; HEURRC-NEXT:    v_mov_b32_e32 v28, s12
+; HEURRC-NEXT:    v_mov_b32_e32 v29, s13
+; HEURRC-NEXT:    v_mov_b32_e32 v30, s14
+; HEURRC-NEXT:    v_mov_b32_e32 v31, s15
+; HEURRC-NEXT:    s_nop 1
+; HEURRC-NEXT:    v_mfma_f32_32x32x1_2b_f32 v[0:31], v33, v34, v[0:31] cbsz:1 abid:2 blgp:3
+; HEURRC-NEXT:    s_nop 15
+; HEURRC-NEXT:    s_nop 1
+; HEURRC-NEXT:    global_store_dwordx4 v32, v[24:27], s[34:35] offset:96
+; HEURRC-NEXT:    global_store_dwordx4 v32, v[28:31], s[34:35] offset:112
+; HEURRC-NEXT:    global_store_dwordx4 v32, v[16:19], s[34:35] offset:64
+; HEURRC-NEXT:    global_store_dwordx4 v32, v[20:23], s[34:35] offset:80
+; HEURRC-NEXT:    global_store_dwordx4 v32, v[8:11], s[34:35] offset:32
+; HEURRC-NEXT:    global_store_dwordx4 v32, v[12:15], s[34:35] offset:48
+; HEURRC-NEXT:    global_store_dwordx4 v32, v[0:3], s[34:35]
+; HEURRC-NEXT:    global_store_dwordx4 v32, v[4:7], s[34:35] offset:16
+; HEURRC-NEXT:    s_endpgm
+;
+; VGPRRC-LABEL: not_enough_agprs:
+; VGPRRC:       ; %bb.0: ; %bb
+; VGPRRC-NEXT:    s_load_dwordx2 s[34:35], s[4:5], 0x24
+; VGPRRC-NEXT:    v_mov_b32_e32 v33, 1.0
+; VGPRRC-NEXT:    v_mov_b32_e32 v34, 2.0
+; VGPRRC-NEXT:    v_mov_b32_e32 v32, 0
+; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
+; VGPRRC-NEXT:    s_load_dwordx16 s[16:31], s[34:35], 0x0
+; VGPRRC-NEXT:    s_load_dwordx16 s[0:15], s[34:35], 0x40
+; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
+; VGPRRC-NEXT:    v_mov_b32_e32 v0, s16
+; VGPRRC-NEXT:    v_mov_b32_e32 v1, s17
+; VGPRRC-NEXT:    v_mov_b32_e32 v2, s18
+; VGPRRC-NEXT:    v_mov_b32_e32 v3, s19
+; VGPRRC-NEXT:    v_mov_b32_e32 v4, s20
+; VGPRRC-NEXT:    v_mov_b32_e32 v5, s21
+; VGPRRC-NEXT:    v_mov_b32_e32 v6, s22
+; VGPRRC-NEXT:    v_mov_b32_e32 v7, s23
+; VGPRRC-NEXT:    v_mov_b32_e32 v8, s24
+; VGPRRC-NEXT:    v_mov_b32_e32 v9, s25
+; VGPRRC-NEXT:    v_mov_b32_e32 v10, s26
+; VGPRRC-NEXT:    v_mov_b32_e32 v11, s27
+; VGPRRC-NEXT:    v_mov_b32_e32 v12, s28
+; VGPRRC-NEXT:    v_mov_b32_e32 v13, s29
+; VGPRRC-NEXT:    v_mov_b32_e32 v14, s30
+; VGPRRC-NEXT:    v_mov_b32_e32 v15, s31
+; VGPRRC-NEXT:    v_mov_b32_e32 v16, s0
+; VGPRRC-NEXT:    v_mov_b32_e32 v17, s1
+; VGPRRC-NEXT:    v_mov_b32_e32 v18, s2
+; VGPRRC-NEXT:    v_mov_b32_e32 v19, s3
+; VGPRRC-NEXT:    v_mov_b32_e32 v20, s4
+; VGPRRC-NEXT:    v_mov_b32_e32 v21, s5
+; VGPRRC-NEXT:    v_mov_b32_e32 v22, s6
+; VGPRRC-NEXT:    v_mov_b32_e32 v23, s7
+; VGPRRC-NEXT:    v_mov_b32_e32 v24, s8
+; VGPRRC-NEXT:    v_mov_b32_e32 v25, s9
+; VGPRRC-NEXT:    v_mov_b32_e32 v26, s10
+; VGPRRC-NEXT:    v_mov_b32_e32 v27, s11
+; VGPRRC-NEXT:    v_mov_b32_e32 v28, s12
+; VGPRRC-NEXT:    v_mov_b32_e32 v29, s13
+; VGPRRC-NEXT:    v_mov_b32_e32 v30, s14
+; VGPRRC-NEXT:    v_mov_b32_e32 v31, s15
+; VGPRRC-NEXT:    s_nop 1
+; VGPRRC-NEXT:    v_mfma_f32_32x32x1_2b_f32 v[0:31], v33, v34, v[0:31] cbsz:1 abid:2 blgp:3
+; VGPRRC-NEXT:    s_nop 15
+; VGPRRC-NEXT:    s_nop 1
+; VGPRRC-NEXT:    global_store_dwordx4 v32, v[24:27], s[34:35] offset:96
+; VGPRRC-NEXT:    global_store_dwordx4 v32, v[28:31], s[34:35] offset:112
+; VGPRRC-NEXT:    global_store_dwordx4 v32, v[16:19], s[34:35] offset:64
+; VGPRRC-NEXT:    global_store_dwordx4 v32, v[20:23], s[34:35] offset:80
+; VGPRRC-NEXT:    global_store_dwordx4 v32, v[8:11], s[34:35] offset:32
+; VGPRRC-NEXT:    global_store_dwordx4 v32, v[12:15], s[34:35] offset:48
+; VGPRRC-NEXT:    global_store_dwordx4 v32, v[0:3], s[34:35]
+; VGPRRC-NEXT:    global_store_dwordx4 v32, v[4:7], s[34:35] offset:16
+; VGPRRC-NEXT:    s_endpgm
+bb:
+  %in.1 = load <32 x float>, ptr addrspace(1) %arg, align 128
+  %mai.1 = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.000000e+00, float 2.000000e+00, <32 x float> %in.1, i32 1, i32 2, i32 3)
+  store <32 x float> %mai.1, ptr addrspace(1) %arg, align 128
+  ret void
+}
+
+define <16 x float> @mfma_scale_respect_flag(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 %scale0, i32 %scale1) #2 {
+; HEURRC-LABEL: mfma_scale_respect_flag:
+; HEURRC:       ; %bb.0:
+; HEURRC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HEURRC-NEXT:    scratch_load_dword a15, off, s32
+; HEURRC-NEXT:    scratch_load_dword v31, off, s32 offset:8
+; HEURRC-NEXT:    scratch_load_dword v32, off, s32 offset:4
+; HEURRC-NEXT:    v_accvgpr_write_b32 a0, v16
+; HEURRC-NEXT:    v_accvgpr_write_b32 a1, v17
+; HEURRC-NEXT:    v_accvgpr_write_b32 a2, v18
+; HEURRC-NEXT:    v_accvgpr_write_b32 a3, v19
+; HEURRC-NEXT:    v_accvgpr_write_b32 a4, v20
+; HEURRC-NEXT:    v_accvgpr_write_b32 a5, v21
+; HEURRC-NEXT:    v_accvgpr_write_b32 a6, v22
+; HEURRC-NEXT:    v_accvgpr_write_b32 a7, v23
+; HEURRC-NEXT:    v_accvgpr_write_b32 a8, v24
+; HEURRC-NEXT:    v_accvgpr_write_b32 a9, v25
+; HEURRC-NEXT:    v_accvgpr_write_b32 a10, v26
+; HEURRC-NEXT:    v_accvgpr_write_b32 a11, v27
+; HEURRC-NEXT:    v_accvgpr_write_b32 a12, v28
+; HEURRC-NEXT:    v_accvgpr_write_b32 a13, v29
+; HEURRC-NEXT:    v_accvgpr_write_b32 a14, v30
+; HEURRC-NEXT:    s_waitcnt vmcnt(0)
+; HEURRC-NEXT:    s_nop 0
+; HEURRC-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v32, v31 op_sel_hi:[0,0,0]
+; HEURRC-NEXT:    s_nop 15
+; HEURRC-NEXT:    s_nop 3
+; HEURRC-NEXT:    v_accvgpr_read_b32 v0, a0
+; HEURRC-NEXT:    v_accvgpr_read_b32 v1, a1
+; HEURRC-NEXT:    v_accvgpr_read_b32 v2, a2
+; HEURRC-NEXT:    v_accvgpr_read_b32 v3, a3
+; HEURRC-NEXT:    v_accvgpr_read_b32 v4, a4
+; HEURRC-NEXT:    v_accvgpr_read_b32 v5, a5
+; HEURRC-NEXT:    v_accvgpr_read_b32 v6, a6
+; HEURRC-NEXT:    v_accvgpr_read_b32 v7, a7
+; HEURRC-NEXT:    v_accvgpr_read_b32 v8, a8
+; HEURRC-NEXT:    v_accvgpr_read_b32 v9, a9
+; HEURRC-NEXT:    v_accvgpr_read_b32 v10, a10
+; HEURRC-NEXT:    v_accvgpr_read_b32 v11, a11
+; HEURRC-NEXT:    v_accvgpr_read_b32 v12, a12
+; HEURRC-NEXT:    v_accvgpr_read_b32 v13, a13
+; HEURRC-NEXT:    v_accvgpr_read_b32 v14, a14
+; HEURRC-NEXT:    v_accvgpr_read_b32 v15, a15
+; HEURRC-NEXT:    s_setpc_b64 s[30:31]
+;
+; VGPRRC-LABEL: mfma_scale_respect_flag:
+; VGPRRC:       ; %bb.0:
+; VGPRRC-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VGPRRC-NEXT:    scratch_load_dword v31, off, s32
+; VGPRRC-NEXT:    scratch_load_dword v32, off, s32 offset:8
+; VGPRRC-NEXT:    scratch_load_dword v33, off, s32 offset:4
+; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
+; VGPRRC-NEXT:    v_mfma_scale_f32_32x32x64_f8f6f4 v[16:31], v[0:7], v[8:15], v[16:31], v33, v32 op_sel_hi:[0,0,0]
+; VGPRRC-NEXT:    s_nop 15
+; VGPRRC-NEXT:    s_nop 3
+; VGPRRC-NEXT:    v_mov_b32_e32 v0, v16
+; VGPRRC-NEXT:    v_mov_b32_e32 v1, v17
+; VGPRRC-NEXT:    v_mov_b32_e32 v2, v18
+; VGPRRC-NEXT:    v_mov_b32_e32 v3, v19
+; VGPRRC-NEXT:    v_mov_b32_e32 v4, v20
+; VGPRRC-NEXT:    v_mov_b32_e32 v5, v21
+; VGPRRC-NEXT:    v_mov_b32_e32 v6, v22
+; VGPRRC-NEXT:    v_mov_b32_e32 v7, v23
+; VGPRRC-NEXT:    v_mov_b32_e32 v8, v24
+; VGPRRC-NEXT:    v_mov_b32_e32 v9, v25
+; VGPRRC-NEXT:    v_mov_b32_e32 v10, v26
+; VGPRRC-NEXT:    v_mov_b32_e32 v11, v27
+; VGPRRC-NEXT:    v_mov_b32_e32 v12, v28
+; VGPRRC-NEXT:    v_mov_b32_e32 v13, v29
+; VGPRRC-NEXT:    v_mov_b32_e32 v14, v30
+; VGPRRC-NEXT:    v_mov_b32_e32 v15, v31
+; VGPRRC-NEXT:    s_setpc_b64 s[30:31]
+  %result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2,
+                                                                                      i32 0, ; cbsz
+                                                                                      i32 0, ; blgp
+                                                                                      i32 0, i32 %scale0, i32 0, i32 %scale1)
+  ret <16 x float> %result
+}
+
 attributes #0 = { "amdgpu-agpr-alloc"="32,256" }
 attributes #1 = { "amdgpu-agpr-alloc"="0,0" }
+attributes #2 = { nounwind "amdgpu-agpr-alloc"="20" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
index 5ab8706..22bc62a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
@@ -726,12 +726,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
 ; GFX90A-VGPR-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX90A-VGPR-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX90A-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
-; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[2:3], s[6:7], s[6:7] op_sel:[0,1]
+; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
+; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[4:5], s[6:7], s[6:7] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    s_nop 1
-; GFX90A-VGPR-NEXT:    v_mfma_f64_4x4x4f64 v[4:5], v[0:1], v[2:3], 0
+; GFX90A-VGPR-NEXT:    v_mfma_f64_4x4x4f64 v[0:1], v[2:3], v[4:5], 0
 ; GFX90A-VGPR-NEXT:    s_nop 3
-; GFX90A-VGPR-NEXT:    v_mfma_f64_4x4x4f64 v[0:1], v[0:1], v[2:3], v[4:5] cbsz:1 abid:2 blgp:3
+; GFX90A-VGPR-NEXT:    v_mfma_f64_4x4x4f64 v[0:1], v[2:3], v[4:5], v[0:1] cbsz:1 abid:2 blgp:3
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX90A-VGPR-NEXT:    s_nop 7
 ; GFX90A-VGPR-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -742,12 +742,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
 ; GFX942-VGPR-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
 ; GFX942-VGPR-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX942-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[0:1], s[2:3]
-; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
+; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[4:5], s[6:7]
 ; GFX942-VGPR-NEXT:    s_nop 1
-; GFX942-VGPR-NEXT:    v_mfma_f64_4x4x4_4b_f64 v[4:5], v[0:1], v[2:3], 0
+; GFX942-VGPR-NEXT:    v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], 0
 ; GFX942-VGPR-NEXT:    s_nop 3
-; GFX942-VGPR-NEXT:    v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[4:5] cbsz:1 abid:2 neg:[1,1,0]
+; GFX942-VGPR-NEXT:    v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], v[0:1] cbsz:1 abid:2 neg:[1,1,0]
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX942-VGPR-NEXT:    s_nop 7
 ; GFX942-VGPR-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -765,10 +765,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
 ; GFX90A-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
 ; GFX90A-NEXT:    s_load_dwordx2 s[12:13], s[4:5], 0x34
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
+; GFX90A-NEXT:    v_mov_b32_e32 v0, s10
 ; GFX90A-NEXT:    s_load_dwordx8 s[0:7], s[8:9], 0x0
-; GFX90A-NEXT:    v_mov_b32_e32 v3, s11
-; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[12:13], s[12:13] op_sel:[0,1]
+; GFX90A-NEXT:    v_mov_b32_e32 v1, s11
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a0, s0
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a1, s1
@@ -779,7 +779,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a6, s6
 ; GFX90A-NEXT:    v_accvgpr_write_b32 a7, s7
 ; GFX90A-NEXT:    s_nop 1
-; GFX90A-NEXT:    v_mfma_f64_16x16x4f64 a[0:7], v[2:3], v[0:1], a[0:7] cbsz:1 abid:2 blgp:3
+; GFX90A-NEXT:    v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX90A-NEXT:    s_nop 15
 ; GFX90A-NEXT:    s_nop 0
@@ -792,10 +792,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
 ; GFX942-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
 ; GFX942-NEXT:    s_load_dwordx2 s[12:13], s[4:5], 0x34
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-NEXT:    v_mov_b32_e32 v2, s10
+; GFX942-NEXT:    v_mov_b32_e32 v0, s10
 ; GFX942-NEXT:    s_load_dwordx8 s[0:7], s[8:9], 0x0
-; GFX942-NEXT:    v_mov_b32_e32 v3, s11
-; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
+; GFX942-NEXT:    v_mov_b32_e32 v1, s11
+; GFX942-NEXT:    v_mov_b64_e32 v[2:3], s[12:13]
 ; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-NEXT:    v_accvgpr_write_b32 a0, s0
 ; GFX942-NEXT:    v_accvgpr_write_b32 a1, s1
@@ -806,7 +806,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
 ; GFX942-NEXT:    v_accvgpr_write_b32 a6, s6
 ; GFX942-NEXT:    v_accvgpr_write_b32 a7, s7
 ; GFX942-NEXT:    s_nop 1
-; GFX942-NEXT:    v_mfma_f64_16x16x4_f64 a[0:7], v[2:3], v[0:1], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
+; GFX942-NEXT:    v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
 ; GFX942-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX942-NEXT:    s_nop 15
 ; GFX942-NEXT:    s_nop 0
@@ -819,17 +819,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
 ; GFX90A-VGPR-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
 ; GFX90A-VGPR-NEXT:    s_load_dwordx2 s[12:13], s[4:5], 0x34
 ; GFX90A-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v10, s10
+; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v8, s10
 ; GFX90A-VGPR-NEXT:    s_load_dwordx8 s[0:7], s[8:9], 0x0
-; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v11, s11
-; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[8:9], s[12:13], s[12:13] op_sel:[0,1]
+; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v9, s11
+; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[10:11], s[12:13], s[12:13] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    s_nop 1
-; GFX90A-VGPR-NEXT:    v_mfma_f64_16x16x4f64 v[0:7], v[10:11], v[8:9], v[0:7] cbsz:1 abid:2 blgp:3
+; GFX90A-VGPR-NEXT:    v_mfma_f64_16x16x4f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 blgp:3
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX90A-VGPR-NEXT:    s_nop 15
 ; GFX90A-VGPR-NEXT:    s_nop 0
@@ -842,17 +842,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
 ; GFX942-VGPR-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
 ; GFX942-VGPR-NEXT:    s_load_dwordx2 s[12:13], s[4:5], 0x34
 ; GFX942-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v10, s10
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v8, s10
 ; GFX942-VGPR-NEXT:    s_load_dwordx8 s[0:7], s[8:9], 0x0
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v11, s11
-; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[8:9], s[12:13]
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v9, s11
+; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
 ; GFX942-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[4:5], s[4:5]
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[6:7], s[6:7]
 ; GFX942-VGPR-NEXT:    s_nop 1
-; GFX942-VGPR-NEXT:    v_mfma_f64_16x16x4_f64 v[0:7], v[10:11], v[8:9], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
+; GFX942-VGPR-NEXT:    v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX942-VGPR-NEXT:    s_nop 15
 ; GFX942-VGPR-NEXT:    s_nop 0
@@ -1629,20 +1629,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v7, 0x3ff00000
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX90A-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v12, s2
-; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v13, s3
+; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v10, s2
+; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v11, s3
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v5, v0
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v6, v0
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v1, v0
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
-; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
+; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    s_nop 1
-; GFX90A-VGPR-NEXT:    v_mfma_f64_16x16x4f64 v[2:9], v[12:13], v[10:11], v[2:9]
+; GFX90A-VGPR-NEXT:    v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
 ; GFX90A-VGPR-NEXT:    s_nop 15
 ; GFX90A-VGPR-NEXT:    s_nop 1
 ; GFX90A-VGPR-NEXT:    global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1657,20 +1657,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v7, 0x3ff00000
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX942-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v12, s2
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v13, s3
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v10, s2
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v11, s3
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v3, v0
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v5, v0
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v6, v0
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v1, v0
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[8:9], v[6:7]
-; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[10:11], s[6:7]
+; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[12:13], s[6:7]
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[4:5], v[2:3]
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
 ; GFX942-VGPR-NEXT:    s_nop 1
-; GFX942-VGPR-NEXT:    v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9]
+; GFX942-VGPR-NEXT:    v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
 ; GFX942-VGPR-NEXT:    s_nop 15
 ; GFX942-VGPR-NEXT:    s_nop 1
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1743,20 +1743,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v1, 0x405ec000
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX90A-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v12, s2
-; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v13, s3
+; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v10, s2
+; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v11, s3
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v3, v1
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v5, v1
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v6, v0
 ; GFX90A-VGPR-NEXT:    v_mov_b32_e32 v7, v1
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
-; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
+; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[12:13], s[6:7], s[6:7] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
 ; GFX90A-VGPR-NEXT:    s_nop 1
-; GFX90A-VGPR-NEXT:    v_mfma_f64_16x16x4f64 v[2:9], v[12:13], v[10:11], v[2:9]
+; GFX90A-VGPR-NEXT:    v_mfma_f64_16x16x4f64 v[2:9], v[10:11], v[12:13], v[2:9]
 ; GFX90A-VGPR-NEXT:    s_nop 15
 ; GFX90A-VGPR-NEXT:    s_nop 1
 ; GFX90A-VGPR-NEXT:    global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1771,20 +1771,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v1, 0x405ec000
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX942-VGPR-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v12, s2
-; GFX942-VGPR-NEXT:    v_mov_b32_e32 v13, s3
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v10, s2
+; GFX942-VGPR-NEXT:    v_mov_b32_e32 v11, s3
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v3, v1
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v5, v1
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v6, v0
 ; GFX942-VGPR-NEXT:    v_mov_b32_e32 v7, v1
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[8:9], v[6:7]
-; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[10:11], s[6:7]
+; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[12:13], s[6:7]
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[6:7], v[4:5]
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[4:5], v[2:3]
 ; GFX942-VGPR-NEXT:    v_mov_b64_e32 v[2:3], v[0:1]
 ; GFX942-VGPR-NEXT:    s_nop 1
-; GFX942-VGPR-NEXT:    v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9]
+; GFX942-VGPR-NEXT:    v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9]
 ; GFX942-VGPR-NEXT:    s_nop 15
 ; GFX942-VGPR-NEXT:    s_nop 1
 ; GFX942-VGPR-NEXT:    global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll
index dc4c9291..2fb677e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll
@@ -1445,20 +1445,20 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x32_f16(ptr addrspace(1) %arg, <
 ; GFX942-SDAG:       ; %bb.0: ; %bb
 ; GFX942-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
 ; GFX942-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x44
-; GFX942-SDAG-NEXT:    v_mov_b32_e32 v6, 0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[10:11]
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[14:15]
-; GFX942-SDAG-NEXT:    v_mov_b32_e32 v7, s6
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[10:11]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[12:13]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[14:15]
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, s6
 ; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[2:3]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[0:1]
 ; GFX942-SDAG-NEXT:    s_nop 1
-; GFX942-SDAG-NEXT:    v_smfmac_f32_16x16x32_f16 v[8:11], v[4:5], v[0:3], v7 cbsz:1 abid:2
+; GFX942-SDAG-NEXT:    v_smfmac_f32_16x16x32_f16 v[6:9], v[10:11], v[2:5], v1 cbsz:1 abid:2
 ; GFX942-SDAG-NEXT:    s_nop 6
-; GFX942-SDAG-NEXT:    global_store_dwordx4 v6, v[8:11], s[8:9]
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v0, v[6:9], s[8:9]
 ; GFX942-SDAG-NEXT:    s_endpgm
 ;
 ; GFX942-GISEL-LABEL: test_smfmac_f32_16x16x32_f16:
@@ -1485,20 +1485,20 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x32_f16(ptr addrspace(1) %arg, <
 ; GFX950-SDAG:       ; %bb.0: ; %bb
 ; GFX950-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
 ; GFX950-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x44
-; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, 0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[10:11]
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[14:15]
-; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s6
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[10:11]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[12:13]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[14:15]
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, s6
 ; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[2:3]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[0:1]
 ; GFX950-SDAG-NEXT:    s_nop 1
-; GFX950-SDAG-NEXT:    v_smfmac_f32_16x16x32_f16 v[8:11], v[4:5], v[0:3], v7 cbsz:1 abid:2
+; GFX950-SDAG-NEXT:    v_smfmac_f32_16x16x32_f16 v[6:9], v[10:11], v[2:5], v1 cbsz:1 abid:2
 ; GFX950-SDAG-NEXT:    s_nop 7
-; GFX950-SDAG-NEXT:    global_store_dwordx4 v6, v[8:11], s[8:9]
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v0, v[6:9], s[8:9]
 ; GFX950-SDAG-NEXT:    s_endpgm
 ;
 ; GFX950-GISEL-LABEL: test_smfmac_f32_16x16x32_f16:
@@ -1577,11 +1577,11 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, <
 ; GFX942-SDAG-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x24
 ; GFX942-SDAG-NEXT:    s_load_dword s24, s[4:5], 0x44
 ; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[18:19]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[22:23], s[18:19]
 ; GFX942-SDAG-NEXT:    s_load_dwordx16 s[0:15], s[16:17], 0x0
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[16:17], s[20:21]
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[22:23]
-; GFX942-SDAG-NEXT:    v_mov_b32_e32 v22, s24
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[20:21]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[22:23]
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v16, s24
 ; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
@@ -1592,7 +1592,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, <
 ; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[12:13]
 ; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[14:15], s[14:15]
 ; GFX942-SDAG-NEXT:    s_nop 1
-; GFX942-SDAG-NEXT:    v_smfmac_f32_32x32x16_f16 v[0:15], v[20:21], v[16:19], v22 cbsz:1 abid:2
+; GFX942-SDAG-NEXT:    v_smfmac_f32_32x32x16_f16 v[0:15], v[22:23], v[18:21], v16 cbsz:1 abid:2
 ; GFX942-SDAG-NEXT:    v_mov_b32_e32 v16, 0
 ; GFX942-SDAG-NEXT:    s_nop 9
 ; GFX942-SDAG-NEXT:    global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
@@ -1635,11 +1635,11 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, <
 ; GFX950-SDAG-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x24
 ; GFX950-SDAG-NEXT:    s_load_dword s24, s[4:5], 0x44
 ; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[18:19]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[22:23], s[18:19]
 ; GFX950-SDAG-NEXT:    s_load_dwordx16 s[0:15], s[16:17], 0x0
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[16:17], s[20:21]
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[22:23]
-; GFX950-SDAG-NEXT:    v_mov_b32_e32 v22, s24
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[20:21]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[22:23]
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s24
 ; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
@@ -1650,7 +1650,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, <
 ; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[12:13]
 ; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[14:15], s[14:15]
 ; GFX950-SDAG-NEXT:    s_nop 1
-; GFX950-SDAG-NEXT:    v_smfmac_f32_32x32x16_f16 v[0:15], v[20:21], v[16:19], v22 cbsz:1 abid:2
+; GFX950-SDAG-NEXT:    v_smfmac_f32_32x32x16_f16 v[0:15], v[22:23], v[18:21], v16 cbsz:1 abid:2
 ; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, 0
 ; GFX950-SDAG-NEXT:    s_nop 10
 ; GFX950-SDAG-NEXT:    global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
@@ -1847,20 +1847,20 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x32_bf16(ptr addrspace(1) %arg,
 ; GFX942-SDAG:       ; %bb.0: ; %bb
 ; GFX942-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
 ; GFX942-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x44
-; GFX942-SDAG-NEXT:    v_mov_b32_e32 v6, 0
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[10:11]
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[14:15]
-; GFX942-SDAG-NEXT:    v_mov_b32_e32 v7, s6
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[10:11]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[12:13]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[14:15]
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v1, s6
 ; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[2:3]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[0:1]
 ; GFX942-SDAG-NEXT:    s_nop 1
-; GFX942-SDAG-NEXT:    v_smfmac_f32_16x16x32_bf16 v[8:11], v[4:5], v[0:3], v7 cbsz:1 abid:2
+; GFX942-SDAG-NEXT:    v_smfmac_f32_16x16x32_bf16 v[6:9], v[10:11], v[2:5], v1 cbsz:1 abid:2
 ; GFX942-SDAG-NEXT:    s_nop 6
-; GFX942-SDAG-NEXT:    global_store_dwordx4 v6, v[8:11], s[8:9]
+; GFX942-SDAG-NEXT:    global_store_dwordx4 v0, v[6:9], s[8:9]
 ; GFX942-SDAG-NEXT:    s_endpgm
 ;
 ; GFX942-GISEL-LABEL: test_smfmac_f32_16x16x32_bf16:
@@ -1887,20 +1887,20 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x32_bf16(ptr addrspace(1) %arg,
 ; GFX950-SDAG:       ; %bb.0: ; %bb
 ; GFX950-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
 ; GFX950-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x44
-; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, 0
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[10:11]
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[12:13]
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[14:15]
-; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s6
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[10:11]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[12:13]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[14:15]
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v1, s6
 ; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[2:3]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[0:1]
 ; GFX950-SDAG-NEXT:    s_nop 1
-; GFX950-SDAG-NEXT:    v_smfmac_f32_16x16x32_bf16 v[8:11], v[4:5], v[0:3], v7 cbsz:1 abid:2
+; GFX950-SDAG-NEXT:    v_smfmac_f32_16x16x32_bf16 v[6:9], v[10:11], v[2:5], v1 cbsz:1 abid:2
 ; GFX950-SDAG-NEXT:    s_nop 7
-; GFX950-SDAG-NEXT:    global_store_dwordx4 v6, v[8:11], s[8:9]
+; GFX950-SDAG-NEXT:    global_store_dwordx4 v0, v[6:9], s[8:9]
 ; GFX950-SDAG-NEXT:    s_endpgm
 ;
 ; GFX950-GISEL-LABEL: test_smfmac_f32_16x16x32_bf16:
@@ -1979,11 +1979,11 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg,
 ; GFX942-SDAG-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x24
 ; GFX942-SDAG-NEXT:    s_load_dword s24, s[4:5], 0x44
 ; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[18:19]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[22:23], s[18:19]
 ; GFX942-SDAG-NEXT:    s_load_dwordx16 s[0:15], s[16:17], 0x0
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[16:17], s[20:21]
-; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[22:23]
-; GFX942-SDAG-NEXT:    v_mov_b32_e32 v22, s24
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[20:21]
+; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[22:23]
+; GFX942-SDAG-NEXT:    v_mov_b32_e32 v16, s24
 ; GFX942-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
@@ -1994,7 +1994,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg,
 ; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[12:13]
 ; GFX942-SDAG-NEXT:    v_mov_b64_e32 v[14:15], s[14:15]
 ; GFX942-SDAG-NEXT:    s_nop 1
-; GFX942-SDAG-NEXT:    v_smfmac_f32_32x32x16_bf16 v[0:15], v[20:21], v[16:19], v22 cbsz:1 abid:2
+; GFX942-SDAG-NEXT:    v_smfmac_f32_32x32x16_bf16 v[0:15], v[22:23], v[18:21], v16 cbsz:1 abid:2
 ; GFX942-SDAG-NEXT:    v_mov_b32_e32 v16, 0
 ; GFX942-SDAG-NEXT:    s_nop 9
 ; GFX942-SDAG-NEXT:    global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
@@ -2037,11 +2037,11 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg,
 ; GFX950-SDAG-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x24
 ; GFX950-SDAG-NEXT:    s_load_dword s24, s[4:5], 0x44
 ; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[18:19]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[22:23], s[18:19]
 ; GFX950-SDAG-NEXT:    s_load_dwordx16 s[0:15], s[16:17], 0x0
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[16:17], s[20:21]
-; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[22:23]
-; GFX950-SDAG-NEXT:    v_mov_b32_e32 v22, s24
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[20:21]
+; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[22:23]
+; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s24
 ; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
 ; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
@@ -2052,7 +2052,7 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg,
 ; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[12:13]
 ; GFX950-SDAG-NEXT:    v_mov_b64_e32 v[14:15], s[14:15]
 ; GFX950-SDAG-NEXT:    s_nop 1
-; GFX950-SDAG-NEXT:    v_smfmac_f32_32x32x16_bf16 v[0:15], v[20:21], v[16:19], v22 cbsz:1 abid:2
+; GFX950-SDAG-NEXT:    v_smfmac_f32_32x32x16_bf16 v[0:15], v[22:23], v[18:21], v16 cbsz:1 abid:2
 ; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, 0
 ; GFX950-SDAG-NEXT:    s_nop 10
 ; GFX950-SDAG-NEXT:    global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll
index 033a35f..13a96cf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll
@@ -15,15 +15,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; GCN-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
-; GCN-NEXT:    v_mov_b64_e32 v[8:9], 48
-; GCN-NEXT:    v_mov_b64_e32 v[10:11], 32
-; GCN-NEXT:    v_mov_b64_e32 v[12:13], 16
+; GCN-NEXT:    v_mov_b64_e32 v[0:1], 48
+; GCN-NEXT:    v_mov_b64_e32 v[2:3], 32
+; GCN-NEXT:    v_mov_b64_e32 v[4:5], 16
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b64_e32 v[0:1], s[24:25]
-; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[26:27]
-; GCN-NEXT:    v_mov_b64_e32 v[4:5], s[28:29]
+; GCN-NEXT:    v_mov_b64_e32 v[8:9], s[24:25]
+; GCN-NEXT:    v_mov_b64_e32 v[10:11], s[26:27]
+; GCN-NEXT:    v_mov_b64_e32 v[12:13], s[28:29]
 ; GCN-NEXT:    v_accvgpr_write_b32 a0, s8
-; GCN-NEXT:    v_mov_b64_e32 v[6:7], s[30:31]
+; GCN-NEXT:    v_mov_b64_e32 v[14:15], s[30:31]
 ; GCN-NEXT:    v_accvgpr_write_b32 a1, s9
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, s10
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, s11
@@ -41,40 +41,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x
 ; GCN-NEXT:    v_accvgpr_write_b32 a15, s23
 ; GCN-NEXT:    v_mov_b32_e32 v16, s16
 ; GCN-NEXT:    v_mov_b32_e32 v17, s17
-; GCN-NEXT:    v_mfma_f32_32x32x16_bf16 a[16:31], v[0:3], v[4:7], a[0:15]
+; GCN-NEXT:    v_mfma_f32_32x32x16_bf16 a[16:31], v[8:11], v[12:15], a[0:15]
 ; GCN-NEXT:    v_mov_b32_e32 v18, s18
 ; GCN-NEXT:    v_mov_b32_e32 v19, s19
-; GCN-NEXT:    v_mov_b32_e32 v0, s20
-; GCN-NEXT:    v_mov_b32_e32 v1, s21
-; GCN-NEXT:    v_mov_b32_e32 v2, s22
-; GCN-NEXT:    v_mov_b32_e32 v3, s23
-; GCN-NEXT:    v_mov_b64_e32 v[14:15], 0
+; GCN-NEXT:    v_mov_b32_e32 v8, s20
+; GCN-NEXT:    v_mov_b32_e32 v9, s21
+; GCN-NEXT:    v_mov_b32_e32 v10, s22
+; GCN-NEXT:    v_mov_b32_e32 v11, s23
+; GCN-NEXT:    v_mov_b64_e32 v[6:7], 0
 ; GCN-NEXT:    s_nop 4
-; GCN-NEXT:    global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_nop 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, s8
 ; GCN-NEXT:    v_mov_b32_e32 v1, s9
 ; GCN-NEXT:    v_mov_b32_e32 v2, s10
 ; GCN-NEXT:    v_mov_b32_e32 v3, s11
-; GCN-NEXT:    global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, s12
 ; GCN-NEXT:    v_mov_b32_e32 v1, s13
 ; GCN-NEXT:    v_mov_b32_e32 v2, s14
 ; GCN-NEXT:    v_mov_b32_e32 v3, s15
-; GCN-NEXT:    global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_endpgm
   %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0)
@@ -88,15 +87,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; GCN-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
-; GCN-NEXT:    v_mov_b64_e32 v[8:9], 48
-; GCN-NEXT:    v_mov_b64_e32 v[10:11], 32
-; GCN-NEXT:    v_mov_b64_e32 v[12:13], 16
+; GCN-NEXT:    v_mov_b64_e32 v[0:1], 48
+; GCN-NEXT:    v_mov_b64_e32 v[2:3], 32
+; GCN-NEXT:    v_mov_b64_e32 v[4:5], 16
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b64_e32 v[0:1], s[24:25]
-; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[26:27]
-; GCN-NEXT:    v_mov_b64_e32 v[4:5], s[28:29]
+; GCN-NEXT:    v_mov_b64_e32 v[8:9], s[24:25]
+; GCN-NEXT:    v_mov_b64_e32 v[10:11], s[26:27]
+; GCN-NEXT:    v_mov_b64_e32 v[12:13], s[28:29]
 ; GCN-NEXT:    v_accvgpr_write_b32 a0, s8
-; GCN-NEXT:    v_mov_b64_e32 v[6:7], s[30:31]
+; GCN-NEXT:    v_mov_b64_e32 v[14:15], s[30:31]
 ; GCN-NEXT:    v_accvgpr_write_b32 a1, s9
 ; GCN-NEXT:    v_accvgpr_write_b32 a2, s10
 ; GCN-NEXT:    v_accvgpr_write_b32 a3, s11
@@ -114,40 +113,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0
 ; GCN-NEXT:    v_accvgpr_write_b32 a15, s23
 ; GCN-NEXT:    v_mov_b32_e32 v16, s16
 ; GCN-NEXT:    v_mov_b32_e32 v17, s17
-; GCN-NEXT:    v_mfma_f32_32x32x16_bf16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1
+; GCN-NEXT:    v_mfma_f32_32x32x16_bf16 a[16:31], v[8:11], v[12:15], a[0:15] cbsz:2 abid:3 blgp:1
 ; GCN-NEXT:    v_mov_b32_e32 v18, s18
 ; GCN-NEXT:    v_mov_b32_e32 v19, s19
-; GCN-NEXT:    v_mov_b32_e32 v0, s20
-; GCN-NEXT:    v_mov_b32_e32 v1, s21
-; GCN-NEXT:    v_mov_b32_e32 v2, s22
-; GCN-NEXT:    v_mov_b32_e32 v3, s23
-; GCN-NEXT:    v_mov_b64_e32 v[14:15], 0
+; GCN-NEXT:    v_mov_b32_e32 v8, s20
+; GCN-NEXT:    v_mov_b32_e32 v9, s21
+; GCN-NEXT:    v_mov_b32_e32 v10, s22
+; GCN-NEXT:    v_mov_b32_e32 v11, s23
+; GCN-NEXT:    v_mov_b64_e32 v[6:7], 0
 ; GCN-NEXT:    s_nop 4
-; GCN-NEXT:    global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_nop 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, s8
 ; GCN-NEXT:    v_mov_b32_e32 v1, s9
 ; GCN-NEXT:    v_mov_b32_e32 v2, s10
 ; GCN-NEXT:    v_mov_b32_e32 v3, s11
-; GCN-NEXT:    global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, s12
 ; GCN-NEXT:    v_mov_b32_e32 v1, s13
 ; GCN-NEXT:    v_mov_b32_e32 v2, s14
 ; GCN-NEXT:    v_mov_b32_e32 v3, s15
-; GCN-NEXT:    global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_endpgm
   %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 2, i32 3, i32 1)
@@ -250,13 +248,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd(<8 x bfloat> %arg
 ; GCN-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; GCN-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; GCN-NEXT:    v_mov_b32_e32 v44, 0
+; GCN-NEXT:    v_mov_b32_e32 v36, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; GCN-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; GCN-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; GCN-NEXT:    v_mov_b64_e32 v[40:41], s[26:27]
+; GCN-NEXT:    v_mov_b64_e32 v[38:39], s[24:25]
+; GCN-NEXT:    v_mov_b64_e32 v[44:45], s[30:31]
 ; GCN-NEXT:    v_mov_b64_e32 v[30:31], s[22:23]
-; GCN-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; GCN-NEXT:    v_mov_b64_e32 v[42:43], s[28:29]
 ; GCN-NEXT:    v_mov_b64_e32 v[28:29], s[20:21]
 ; GCN-NEXT:    v_mov_b64_e32 v[26:27], s[18:19]
 ; GCN-NEXT:    v_mov_b64_e32 v[24:25], s[16:17]
@@ -264,41 +262,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd(<8 x bfloat> %arg
 ; GCN-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
 ; GCN-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
 ; GCN-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; GCN-NEXT:    v_mov_b32_e32 v40, s20
-; GCN-NEXT:    v_mov_b32_e32 v41, s21
-; GCN-NEXT:    v_mfma_f32_32x32x16_bf16 v[0:15], v[32:35], v[36:39], v[16:31]
-; GCN-NEXT:    v_mov_b32_e32 v42, s22
-; GCN-NEXT:    v_mov_b32_e32 v43, s23
-; GCN-NEXT:    global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; GCN-NEXT:    v_mov_b32_e32 v32, s20
+; GCN-NEXT:    v_mov_b32_e32 v33, s21
+; GCN-NEXT:    v_mfma_f32_32x32x16_bf16 v[0:15], v[38:41], v[42:45], v[16:31]
+; GCN-NEXT:    v_mov_b32_e32 v34, s22
+; GCN-NEXT:    v_mov_b32_e32 v35, s23
+; GCN-NEXT:    global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 2
 ; GCN-NEXT:    v_mov_b32_e32 v16, s16
 ; GCN-NEXT:    v_mov_b32_e32 v17, s17
 ; GCN-NEXT:    v_mov_b32_e32 v18, s18
 ; GCN-NEXT:    v_mov_b32_e32 v19, s19
-; GCN-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 0
 ; GCN-NEXT:    v_mov_b32_e32 v16, s12
 ; GCN-NEXT:    v_mov_b32_e32 v17, s13
 ; GCN-NEXT:    v_mov_b32_e32 v18, s14
 ; GCN-NEXT:    v_mov_b32_e32 v19, s15
-; GCN-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 0
 ; GCN-NEXT:    v_mov_b32_e32 v16, s8
 ; GCN-NEXT:    v_mov_b32_e32 v17, s9
 ; GCN-NEXT:    v_mov_b32_e32 v18, s10
 ; GCN-NEXT:    v_mov_b32_e32 v19, s11
-; GCN-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_endpgm
   %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0)
@@ -313,13 +311,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd__flags(<8 x bfloa
 ; GCN-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; GCN-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; GCN-NEXT:    v_mov_b32_e32 v44, 0
+; GCN-NEXT:    v_mov_b32_e32 v36, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; GCN-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; GCN-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; GCN-NEXT:    v_mov_b64_e32 v[40:41], s[26:27]
+; GCN-NEXT:    v_mov_b64_e32 v[38:39], s[24:25]
+; GCN-NEXT:    v_mov_b64_e32 v[44:45], s[30:31]
 ; GCN-NEXT:    v_mov_b64_e32 v[30:31], s[22:23]
-; GCN-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; GCN-NEXT:    v_mov_b64_e32 v[42:43], s[28:29]
 ; GCN-NEXT:    v_mov_b64_e32 v[28:29], s[20:21]
 ; GCN-NEXT:    v_mov_b64_e32 v[26:27], s[18:19]
 ; GCN-NEXT:    v_mov_b64_e32 v[24:25], s[16:17]
@@ -327,41 +325,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd__flags(<8 x bfloa
 ; GCN-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
 ; GCN-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
 ; GCN-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; GCN-NEXT:    v_mov_b32_e32 v40, s20
-; GCN-NEXT:    v_mov_b32_e32 v41, s21
-; GCN-NEXT:    v_mfma_f32_32x32x16_bf16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
-; GCN-NEXT:    v_mov_b32_e32 v42, s22
-; GCN-NEXT:    v_mov_b32_e32 v43, s23
-; GCN-NEXT:    global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; GCN-NEXT:    v_mov_b32_e32 v32, s20
+; GCN-NEXT:    v_mov_b32_e32 v33, s21
+; GCN-NEXT:    v_mfma_f32_32x32x16_bf16 v[0:15], v[38:41], v[42:45], v[16:31] cbsz:1 abid:2 blgp:3
+; GCN-NEXT:    v_mov_b32_e32 v34, s22
+; GCN-NEXT:    v_mov_b32_e32 v35, s23
+; GCN-NEXT:    global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 2
 ; GCN-NEXT:    v_mov_b32_e32 v16, s16
 ; GCN-NEXT:    v_mov_b32_e32 v17, s17
 ; GCN-NEXT:    v_mov_b32_e32 v18, s18
 ; GCN-NEXT:    v_mov_b32_e32 v19, s19
-; GCN-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 0
 ; GCN-NEXT:    v_mov_b32_e32 v16, s12
 ; GCN-NEXT:    v_mov_b32_e32 v17, s13
 ; GCN-NEXT:    v_mov_b32_e32 v18, s14
 ; GCN-NEXT:    v_mov_b32_e32 v19, s15
-; GCN-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 0
 ; GCN-NEXT:    v_mov_b32_e32 v16, s8
 ; GCN-NEXT:    v_mov_b32_e32 v17, s9
 ; GCN-NEXT:    v_mov_b32_e32 v18, s10
 ; GCN-NEXT:    v_mov_b32_e32 v19, s11
-; GCN-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
+; GCN-NEXT:    global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_endpgm
   %result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 1, i32 2, i32 3)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll
index 7532062..ab0000f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll
@@ -141,18 +141,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
 ; SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; SDAG-NEXT:    v_mov_b32_e32 v12, 0
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11]
+; SDAG-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3]
 ; SDAG-NEXT:    s_nop 7
-; SDAG-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; SDAG-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; SDAG-NEXT:    s_endpgm
 ;
 ; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd:
@@ -179,18 +179,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
 ; HEURRC-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; HEURRC-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; HEURRC-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; HEURRC-NEXT:    v_mov_b32_e32 v12, 0
+; HEURRC-NEXT:    v_mov_b32_e32 v4, 0
 ; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; HEURRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; HEURRC-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; HEURRC-NEXT:    s_nop 1
-; HEURRC-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11]
+; HEURRC-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3]
 ; HEURRC-NEXT:    s_nop 7
-; HEURRC-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; HEURRC-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; HEURRC-NEXT:    s_endpgm
 ;
 ; VGPRRC-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd:
@@ -198,18 +198,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
 ; VGPRRC-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; VGPRRC-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; VGPRRC-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; VGPRRC-NEXT:    v_mov_b32_e32 v12, 0
+; VGPRRC-NEXT:    v_mov_b32_e32 v4, 0
 ; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; VGPRRC-NEXT:    s_nop 1
-; VGPRRC-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11]
+; VGPRRC-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3]
 ; VGPRRC-NEXT:    s_nop 7
-; VGPRRC-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; VGPRRC-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; VGPRRC-NEXT:    s_endpgm
 ; AGPR-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd:
 ; AGPR:       ; %bb.0:
@@ -260,18 +260,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
 ; SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; SDAG-NEXT:    v_mov_b32_e32 v12, 0
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
+; SDAG-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1
 ; SDAG-NEXT:    s_nop 7
-; SDAG-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; SDAG-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; SDAG-NEXT:    s_endpgm
 ;
 ; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags:
@@ -298,18 +298,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
 ; HEURRC-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; HEURRC-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; HEURRC-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; HEURRC-NEXT:    v_mov_b32_e32 v12, 0
+; HEURRC-NEXT:    v_mov_b32_e32 v4, 0
 ; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; HEURRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; HEURRC-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; HEURRC-NEXT:    s_nop 1
-; HEURRC-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
+; HEURRC-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1
 ; HEURRC-NEXT:    s_nop 7
-; HEURRC-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; HEURRC-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; HEURRC-NEXT:    s_endpgm
 ;
 ; VGPRRC-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags:
@@ -317,18 +317,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
 ; VGPRRC-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; VGPRRC-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; VGPRRC-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; VGPRRC-NEXT:    v_mov_b32_e32 v12, 0
+; VGPRRC-NEXT:    v_mov_b32_e32 v4, 0
 ; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; VGPRRC-NEXT:    s_nop 1
-; VGPRRC-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
+; VGPRRC-NEXT:    v_mfma_f32_16x16x32_f16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1
 ; VGPRRC-NEXT:    s_nop 7
-; VGPRRC-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; VGPRRC-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; VGPRRC-NEXT:    s_endpgm
 ; AGPR-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags:
 ; AGPR:       ; %bb.0:
@@ -382,15 +382,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; SDAG-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
-; SDAG-NEXT:    v_mov_b64_e32 v[8:9], 48
-; SDAG-NEXT:    v_mov_b64_e32 v[10:11], 32
-; SDAG-NEXT:    v_mov_b64_e32 v[12:13], 16
+; SDAG-NEXT:    v_mov_b64_e32 v[0:1], 48
+; SDAG-NEXT:    v_mov_b64_e32 v[2:3], 32
+; SDAG-NEXT:    v_mov_b64_e32 v[4:5], 16
 ; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[24:25]
-; SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[26:27]
-; SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[28:29]
+; SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[24:25]
+; SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[26:27]
+; SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[28:29]
 ; SDAG-NEXT:    v_accvgpr_write_b32 a0, s8
-; SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[30:31]
+; SDAG-NEXT:    v_mov_b64_e32 v[14:15], s[30:31]
 ; SDAG-NEXT:    v_accvgpr_write_b32 a1, s9
 ; SDAG-NEXT:    v_accvgpr_write_b32 a2, s10
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, s11
@@ -408,40 +408,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
 ; SDAG-NEXT:    v_accvgpr_write_b32 a15, s23
 ; SDAG-NEXT:    v_mov_b32_e32 v16, s16
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s17
-; SDAG-NEXT:    v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15]
+; SDAG-NEXT:    v_mfma_f32_32x32x16_f16 a[16:31], v[8:11], v[12:15], a[0:15]
 ; SDAG-NEXT:    v_mov_b32_e32 v18, s18
 ; SDAG-NEXT:    v_mov_b32_e32 v19, s19
-; SDAG-NEXT:    v_mov_b32_e32 v0, s20
-; SDAG-NEXT:    v_mov_b32_e32 v1, s21
-; SDAG-NEXT:    v_mov_b32_e32 v2, s22
-; SDAG-NEXT:    v_mov_b32_e32 v3, s23
-; SDAG-NEXT:    v_mov_b64_e32 v[14:15], 0
+; SDAG-NEXT:    v_mov_b32_e32 v8, s20
+; SDAG-NEXT:    v_mov_b32_e32 v9, s21
+; SDAG-NEXT:    v_mov_b32_e32 v10, s22
+; SDAG-NEXT:    v_mov_b32_e32 v11, s23
+; SDAG-NEXT:    v_mov_b64_e32 v[6:7], 0
 ; SDAG-NEXT:    s_nop 4
-; SDAG-NEXT:    global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    s_nop 0
 ; SDAG-NEXT:    v_mov_b32_e32 v0, s8
 ; SDAG-NEXT:    v_mov_b32_e32 v1, s9
 ; SDAG-NEXT:    v_mov_b32_e32 v2, s10
 ; SDAG-NEXT:    v_mov_b32_e32 v3, s11
-; SDAG-NEXT:    global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
 ; SDAG-NEXT:    v_mov_b32_e32 v0, s12
 ; SDAG-NEXT:    v_mov_b32_e32 v1, s13
 ; SDAG-NEXT:    v_mov_b32_e32 v2, s14
 ; SDAG-NEXT:    v_mov_b32_e32 v3, s15
-; SDAG-NEXT:    global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_endpgm
 ;
@@ -508,15 +507,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
 ; HEURRC:       ; %bb.0:
 ; HEURRC-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; HEURRC-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
-; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], 48
-; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], 32
-; HEURRC-NEXT:    v_mov_b64_e32 v[12:13], 16
+; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], 48
+; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], 32
+; HEURRC-NEXT:    v_mov_b64_e32 v[4:5], 16
 ; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[24:25]
-; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[26:27]
-; HEURRC-NEXT:    v_mov_b64_e32 v[4:5], s[28:29]
+; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[24:25]
+; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[26:27]
+; HEURRC-NEXT:    v_mov_b64_e32 v[12:13], s[28:29]
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a0, s8
-; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[30:31]
+; HEURRC-NEXT:    v_mov_b64_e32 v[14:15], s[30:31]
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a1, s9
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a2, s10
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a3, s11
@@ -534,40 +533,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a15, s23
 ; HEURRC-NEXT:    v_mov_b32_e32 v16, s16
 ; HEURRC-NEXT:    v_mov_b32_e32 v17, s17
-; HEURRC-NEXT:    v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15]
+; HEURRC-NEXT:    v_mfma_f32_32x32x16_f16 a[16:31], v[8:11], v[12:15], a[0:15]
 ; HEURRC-NEXT:    v_mov_b32_e32 v18, s18
 ; HEURRC-NEXT:    v_mov_b32_e32 v19, s19
-; HEURRC-NEXT:    v_mov_b32_e32 v0, s20
-; HEURRC-NEXT:    v_mov_b32_e32 v1, s21
-; HEURRC-NEXT:    v_mov_b32_e32 v2, s22
-; HEURRC-NEXT:    v_mov_b32_e32 v3, s23
-; HEURRC-NEXT:    v_mov_b64_e32 v[14:15], 0
+; HEURRC-NEXT:    v_mov_b32_e32 v8, s20
+; HEURRC-NEXT:    v_mov_b32_e32 v9, s21
+; HEURRC-NEXT:    v_mov_b32_e32 v10, s22
+; HEURRC-NEXT:    v_mov_b32_e32 v11, s23
+; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], 0
 ; HEURRC-NEXT:    s_nop 4
-; HEURRC-NEXT:    global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    s_nop 0
 ; HEURRC-NEXT:    v_mov_b32_e32 v0, s8
 ; HEURRC-NEXT:    v_mov_b32_e32 v1, s9
 ; HEURRC-NEXT:    v_mov_b32_e32 v2, s10
 ; HEURRC-NEXT:    v_mov_b32_e32 v3, s11
-; HEURRC-NEXT:    global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_nop 0
 ; HEURRC-NEXT:    v_mov_b32_e32 v0, s12
 ; HEURRC-NEXT:    v_mov_b32_e32 v1, s13
 ; HEURRC-NEXT:    v_mov_b32_e32 v2, s14
 ; HEURRC-NEXT:    v_mov_b32_e32 v3, s15
-; HEURRC-NEXT:    global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_endpgm
 ;
@@ -575,15 +573,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
 ; VGPRRC:       ; %bb.0:
 ; VGPRRC-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; VGPRRC-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
-; VGPRRC-NEXT:    v_mov_b64_e32 v[40:41], 48
-; VGPRRC-NEXT:    v_mov_b64_e32 v[42:43], 32
-; VGPRRC-NEXT:    v_mov_b64_e32 v[44:45], 16
+; VGPRRC-NEXT:    v_mov_b64_e32 v[32:33], 48
+; VGPRRC-NEXT:    v_mov_b64_e32 v[34:35], 32
+; VGPRRC-NEXT:    v_mov_b64_e32 v[36:37], 16
 ; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[42:43], s[26:27]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[40:41], s[24:25]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[46:47], s[30:31]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[44:45], s[28:29]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
@@ -593,40 +591,40 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[14:15], s[22:23]
 ; VGPRRC-NEXT:    v_mov_b32_e32 v48, s16
 ; VGPRRC-NEXT:    v_mov_b32_e32 v49, s17
-; VGPRRC-NEXT:    v_mfma_f32_32x32x16_f16 v[16:31], v[32:35], v[36:39], v[0:15]
+; VGPRRC-NEXT:    v_mfma_f32_32x32x16_f16 v[16:31], v[40:43], v[44:47], v[0:15]
 ; VGPRRC-NEXT:    v_mov_b32_e32 v50, s18
 ; VGPRRC-NEXT:    v_mov_b32_e32 v51, s19
-; VGPRRC-NEXT:    v_mov_b64_e32 v[46:47], 0
+; VGPRRC-NEXT:    v_mov_b64_e32 v[38:39], 0
 ; VGPRRC-NEXT:    s_nop 8
-; VGPRRC-NEXT:    global_store_dwordx4 v[40:41], v[28:31], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[32:33], v[28:31], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v[42:43], v[24:27], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[34:35], v[24:27], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v[44:45], v[20:23], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[36:37], v[20:23], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v[46:47], v[16:19], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[38:39], v[16:19], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    v_mov_b32_e32 v0, s20
 ; VGPRRC-NEXT:    v_mov_b32_e32 v1, s21
 ; VGPRRC-NEXT:    v_mov_b32_e32 v2, s22
 ; VGPRRC-NEXT:    v_mov_b32_e32 v3, s23
-; VGPRRC-NEXT:    global_store_dwordx4 v[42:43], v[48:51], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[34:35], v[48:51], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v[40:41], v[0:3], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[32:33], v[0:3], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 0
 ; VGPRRC-NEXT:    v_mov_b32_e32 v0, s8
 ; VGPRRC-NEXT:    v_mov_b32_e32 v1, s9
 ; VGPRRC-NEXT:    v_mov_b32_e32 v2, s10
 ; VGPRRC-NEXT:    v_mov_b32_e32 v3, s11
-; VGPRRC-NEXT:    global_store_dwordx4 v[46:47], v[0:3], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[38:39], v[0:3], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 0
 ; VGPRRC-NEXT:    v_mov_b32_e32 v0, s12
 ; VGPRRC-NEXT:    v_mov_b32_e32 v1, s13
 ; VGPRRC-NEXT:    v_mov_b32_e32 v2, s14
 ; VGPRRC-NEXT:    v_mov_b32_e32 v3, s15
-; VGPRRC-NEXT:    global_store_dwordx4 v[44:45], v[0:3], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[36:37], v[0:3], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_endpgm
 ; AGPR-LABEL: test_mfma_f32_32x32x16_f16:
@@ -765,15 +763,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; SDAG-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
-; SDAG-NEXT:    v_mov_b64_e32 v[8:9], 48
-; SDAG-NEXT:    v_mov_b64_e32 v[10:11], 32
-; SDAG-NEXT:    v_mov_b64_e32 v[12:13], 16
+; SDAG-NEXT:    v_mov_b64_e32 v[0:1], 48
+; SDAG-NEXT:    v_mov_b64_e32 v[2:3], 32
+; SDAG-NEXT:    v_mov_b64_e32 v[4:5], 16
 ; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[24:25]
-; SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[26:27]
-; SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[28:29]
+; SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[24:25]
+; SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[26:27]
+; SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[28:29]
 ; SDAG-NEXT:    v_accvgpr_write_b32 a0, s8
-; SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[30:31]
+; SDAG-NEXT:    v_mov_b64_e32 v[14:15], s[30:31]
 ; SDAG-NEXT:    v_accvgpr_write_b32 a1, s9
 ; SDAG-NEXT:    v_accvgpr_write_b32 a2, s10
 ; SDAG-NEXT:    v_accvgpr_write_b32 a3, s11
@@ -791,40 +789,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
 ; SDAG-NEXT:    v_accvgpr_write_b32 a15, s23
 ; SDAG-NEXT:    v_mov_b32_e32 v16, s16
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s17
-; SDAG-NEXT:    v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1
+; SDAG-NEXT:    v_mfma_f32_32x32x16_f16 a[16:31], v[8:11], v[12:15], a[0:15] cbsz:2 abid:3 blgp:1
 ; SDAG-NEXT:    v_mov_b32_e32 v18, s18
 ; SDAG-NEXT:    v_mov_b32_e32 v19, s19
-; SDAG-NEXT:    v_mov_b32_e32 v0, s20
-; SDAG-NEXT:    v_mov_b32_e32 v1, s21
-; SDAG-NEXT:    v_mov_b32_e32 v2, s22
-; SDAG-NEXT:    v_mov_b32_e32 v3, s23
-; SDAG-NEXT:    v_mov_b64_e32 v[14:15], 0
+; SDAG-NEXT:    v_mov_b32_e32 v8, s20
+; SDAG-NEXT:    v_mov_b32_e32 v9, s21
+; SDAG-NEXT:    v_mov_b32_e32 v10, s22
+; SDAG-NEXT:    v_mov_b32_e32 v11, s23
+; SDAG-NEXT:    v_mov_b64_e32 v[6:7], 0
 ; SDAG-NEXT:    s_nop 4
-; SDAG-NEXT:    global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    s_nop 0
 ; SDAG-NEXT:    v_mov_b32_e32 v0, s8
 ; SDAG-NEXT:    v_mov_b32_e32 v1, s9
 ; SDAG-NEXT:    v_mov_b32_e32 v2, s10
 ; SDAG-NEXT:    v_mov_b32_e32 v3, s11
-; SDAG-NEXT:    global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
 ; SDAG-NEXT:    v_mov_b32_e32 v0, s12
 ; SDAG-NEXT:    v_mov_b32_e32 v1, s13
 ; SDAG-NEXT:    v_mov_b32_e32 v2, s14
 ; SDAG-NEXT:    v_mov_b32_e32 v3, s15
-; SDAG-NEXT:    global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_endpgm
 ;
@@ -891,15 +888,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
 ; HEURRC:       ; %bb.0:
 ; HEURRC-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; HEURRC-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
-; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], 48
-; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], 32
-; HEURRC-NEXT:    v_mov_b64_e32 v[12:13], 16
+; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], 48
+; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], 32
+; HEURRC-NEXT:    v_mov_b64_e32 v[4:5], 16
 ; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[24:25]
-; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[26:27]
-; HEURRC-NEXT:    v_mov_b64_e32 v[4:5], s[28:29]
+; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[24:25]
+; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[26:27]
+; HEURRC-NEXT:    v_mov_b64_e32 v[12:13], s[28:29]
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a0, s8
-; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[30:31]
+; HEURRC-NEXT:    v_mov_b64_e32 v[14:15], s[30:31]
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a1, s9
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a2, s10
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a3, s11
@@ -917,40 +914,39 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
 ; HEURRC-NEXT:    v_accvgpr_write_b32 a15, s23
 ; HEURRC-NEXT:    v_mov_b32_e32 v16, s16
 ; HEURRC-NEXT:    v_mov_b32_e32 v17, s17
-; HEURRC-NEXT:    v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:2 abid:3 blgp:1
+; HEURRC-NEXT:    v_mfma_f32_32x32x16_f16 a[16:31], v[8:11], v[12:15], a[0:15] cbsz:2 abid:3 blgp:1
 ; HEURRC-NEXT:    v_mov_b32_e32 v18, s18
 ; HEURRC-NEXT:    v_mov_b32_e32 v19, s19
-; HEURRC-NEXT:    v_mov_b32_e32 v0, s20
-; HEURRC-NEXT:    v_mov_b32_e32 v1, s21
-; HEURRC-NEXT:    v_mov_b32_e32 v2, s22
-; HEURRC-NEXT:    v_mov_b32_e32 v3, s23
-; HEURRC-NEXT:    v_mov_b64_e32 v[14:15], 0
+; HEURRC-NEXT:    v_mov_b32_e32 v8, s20
+; HEURRC-NEXT:    v_mov_b32_e32 v9, s21
+; HEURRC-NEXT:    v_mov_b32_e32 v10, s22
+; HEURRC-NEXT:    v_mov_b32_e32 v11, s23
+; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], 0
 ; HEURRC-NEXT:    s_nop 4
-; HEURRC-NEXT:    global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[0:1], a[28:31], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[2:3], a[24:27], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[12:13], a[20:23], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[4:5], a[20:23], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[14:15], a[16:19], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[6:7], a[16:19], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[10:11], v[16:19], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[2:3], v[16:19], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[0:1], v[8:11], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    s_nop 0
 ; HEURRC-NEXT:    v_mov_b32_e32 v0, s8
 ; HEURRC-NEXT:    v_mov_b32_e32 v1, s9
 ; HEURRC-NEXT:    v_mov_b32_e32 v2, s10
 ; HEURRC-NEXT:    v_mov_b32_e32 v3, s11
-; HEURRC-NEXT:    global_store_dwordx4 v[14:15], v[0:3], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[6:7], v[0:3], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_nop 0
 ; HEURRC-NEXT:    v_mov_b32_e32 v0, s12
 ; HEURRC-NEXT:    v_mov_b32_e32 v1, s13
 ; HEURRC-NEXT:    v_mov_b32_e32 v2, s14
 ; HEURRC-NEXT:    v_mov_b32_e32 v3, s15
-; HEURRC-NEXT:    global_store_dwordx4 v[12:13], v[0:3], off sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_endpgm
 ;
@@ -958,15 +954,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
 ; VGPRRC:       ; %bb.0:
 ; VGPRRC-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; VGPRRC-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
-; VGPRRC-NEXT:    v_mov_b64_e32 v[40:41], 48
-; VGPRRC-NEXT:    v_mov_b64_e32 v[42:43], 32
-; VGPRRC-NEXT:    v_mov_b64_e32 v[44:45], 16
+; VGPRRC-NEXT:    v_mov_b64_e32 v[32:33], 48
+; VGPRRC-NEXT:    v_mov_b64_e32 v[34:35], 32
+; VGPRRC-NEXT:    v_mov_b64_e32 v[36:37], 16
 ; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[42:43], s[26:27]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[40:41], s[24:25]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[46:47], s[30:31]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[44:45], s[28:29]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
@@ -976,40 +972,40 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[14:15], s[22:23]
 ; VGPRRC-NEXT:    v_mov_b32_e32 v48, s16
 ; VGPRRC-NEXT:    v_mov_b32_e32 v49, s17
-; VGPRRC-NEXT:    v_mfma_f32_32x32x16_f16 v[16:31], v[32:35], v[36:39], v[0:15] cbsz:2 abid:3 blgp:1
+; VGPRRC-NEXT:    v_mfma_f32_32x32x16_f16 v[16:31], v[40:43], v[44:47], v[0:15] cbsz:2 abid:3 blgp:1
 ; VGPRRC-NEXT:    v_mov_b32_e32 v50, s18
 ; VGPRRC-NEXT:    v_mov_b32_e32 v51, s19
-; VGPRRC-NEXT:    v_mov_b64_e32 v[46:47], 0
+; VGPRRC-NEXT:    v_mov_b64_e32 v[38:39], 0
 ; VGPRRC-NEXT:    s_nop 8
-; VGPRRC-NEXT:    global_store_dwordx4 v[40:41], v[28:31], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[32:33], v[28:31], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v[42:43], v[24:27], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[34:35], v[24:27], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v[44:45], v[20:23], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[36:37], v[20:23], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v[46:47], v[16:19], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[38:39], v[16:19], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    v_mov_b32_e32 v0, s20
 ; VGPRRC-NEXT:    v_mov_b32_e32 v1, s21
 ; VGPRRC-NEXT:    v_mov_b32_e32 v2, s22
 ; VGPRRC-NEXT:    v_mov_b32_e32 v3, s23
-; VGPRRC-NEXT:    global_store_dwordx4 v[42:43], v[48:51], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[34:35], v[48:51], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v[40:41], v[0:3], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[32:33], v[0:3], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 0
 ; VGPRRC-NEXT:    v_mov_b32_e32 v0, s8
 ; VGPRRC-NEXT:    v_mov_b32_e32 v1, s9
 ; VGPRRC-NEXT:    v_mov_b32_e32 v2, s10
 ; VGPRRC-NEXT:    v_mov_b32_e32 v3, s11
-; VGPRRC-NEXT:    global_store_dwordx4 v[46:47], v[0:3], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[38:39], v[0:3], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 0
 ; VGPRRC-NEXT:    v_mov_b32_e32 v0, s12
 ; VGPRRC-NEXT:    v_mov_b32_e32 v1, s13
 ; VGPRRC-NEXT:    v_mov_b32_e32 v2, s14
 ; VGPRRC-NEXT:    v_mov_b32_e32 v3, s15
-; VGPRRC-NEXT:    global_store_dwordx4 v[44:45], v[0:3], off sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v[36:37], v[0:3], off sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_endpgm
 ; AGPR-LABEL: test_mfma_f32_32x32x16_f16__flags:
@@ -1489,13 +1485,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
 ; SDAG-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; SDAG-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
 ; SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; SDAG-NEXT:    v_mov_b32_e32 v44, 0
+; SDAG-NEXT:    v_mov_b32_e32 v36, 0
 ; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; SDAG-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; SDAG-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; SDAG-NEXT:    v_mov_b64_e32 v[40:41], s[26:27]
+; SDAG-NEXT:    v_mov_b64_e32 v[38:39], s[24:25]
+; SDAG-NEXT:    v_mov_b64_e32 v[44:45], s[30:31]
 ; SDAG-NEXT:    v_mov_b64_e32 v[30:31], s[22:23]
-; SDAG-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; SDAG-NEXT:    v_mov_b64_e32 v[42:43], s[28:29]
 ; SDAG-NEXT:    v_mov_b64_e32 v[28:29], s[20:21]
 ; SDAG-NEXT:    v_mov_b64_e32 v[26:27], s[18:19]
 ; SDAG-NEXT:    v_mov_b64_e32 v[24:25], s[16:17]
@@ -1503,41 +1499,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
 ; SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
 ; SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
 ; SDAG-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; SDAG-NEXT:    v_mov_b32_e32 v40, s20
-; SDAG-NEXT:    v_mov_b32_e32 v41, s21
-; SDAG-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31]
-; SDAG-NEXT:    v_mov_b32_e32 v42, s22
-; SDAG-NEXT:    v_mov_b32_e32 v43, s23
-; SDAG-NEXT:    global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT:    v_mov_b32_e32 v32, s20
+; SDAG-NEXT:    v_mov_b32_e32 v33, s21
+; SDAG-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31]
+; SDAG-NEXT:    v_mov_b32_e32 v34, s22
+; SDAG-NEXT:    v_mov_b32_e32 v35, s23
+; SDAG-NEXT:    global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 2
 ; SDAG-NEXT:    v_mov_b32_e32 v16, s16
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s17
 ; SDAG-NEXT:    v_mov_b32_e32 v18, s18
 ; SDAG-NEXT:    v_mov_b32_e32 v19, s19
-; SDAG-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
 ; SDAG-NEXT:    v_mov_b32_e32 v16, s12
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s13
 ; SDAG-NEXT:    v_mov_b32_e32 v18, s14
 ; SDAG-NEXT:    v_mov_b32_e32 v19, s15
-; SDAG-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
 ; SDAG-NEXT:    v_mov_b32_e32 v16, s8
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s9
 ; SDAG-NEXT:    v_mov_b32_e32 v18, s10
 ; SDAG-NEXT:    v_mov_b32_e32 v19, s11
-; SDAG-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_endpgm
 ;
@@ -1592,13 +1588,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
 ; HEURRC-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; HEURRC-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
 ; HEURRC-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; HEURRC-NEXT:    v_mov_b32_e32 v44, 0
+; HEURRC-NEXT:    v_mov_b32_e32 v36, 0
 ; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; HEURRC-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; HEURRC-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; HEURRC-NEXT:    v_mov_b64_e32 v[40:41], s[26:27]
+; HEURRC-NEXT:    v_mov_b64_e32 v[38:39], s[24:25]
+; HEURRC-NEXT:    v_mov_b64_e32 v[44:45], s[30:31]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[30:31], s[22:23]
-; HEURRC-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; HEURRC-NEXT:    v_mov_b64_e32 v[42:43], s[28:29]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[28:29], s[20:21]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[26:27], s[18:19]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[24:25], s[16:17]
@@ -1606,41 +1602,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
 ; HEURRC-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; HEURRC-NEXT:    v_mov_b32_e32 v40, s20
-; HEURRC-NEXT:    v_mov_b32_e32 v41, s21
-; HEURRC-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31]
-; HEURRC-NEXT:    v_mov_b32_e32 v42, s22
-; HEURRC-NEXT:    v_mov_b32_e32 v43, s23
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT:    v_mov_b32_e32 v32, s20
+; HEURRC-NEXT:    v_mov_b32_e32 v33, s21
+; HEURRC-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31]
+; HEURRC-NEXT:    v_mov_b32_e32 v34, s22
+; HEURRC-NEXT:    v_mov_b32_e32 v35, s23
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_nop 2
 ; HEURRC-NEXT:    v_mov_b32_e32 v16, s16
 ; HEURRC-NEXT:    v_mov_b32_e32 v17, s17
 ; HEURRC-NEXT:    v_mov_b32_e32 v18, s18
 ; HEURRC-NEXT:    v_mov_b32_e32 v19, s19
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_nop 0
 ; HEURRC-NEXT:    v_mov_b32_e32 v16, s12
 ; HEURRC-NEXT:    v_mov_b32_e32 v17, s13
 ; HEURRC-NEXT:    v_mov_b32_e32 v18, s14
 ; HEURRC-NEXT:    v_mov_b32_e32 v19, s15
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_nop 0
 ; HEURRC-NEXT:    v_mov_b32_e32 v16, s8
 ; HEURRC-NEXT:    v_mov_b32_e32 v17, s9
 ; HEURRC-NEXT:    v_mov_b32_e32 v18, s10
 ; HEURRC-NEXT:    v_mov_b32_e32 v19, s11
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_endpgm
 ;
@@ -1649,13 +1645,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
 ; VGPRRC-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; VGPRRC-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
 ; VGPRRC-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; VGPRRC-NEXT:    v_mov_b32_e32 v44, 0
+; VGPRRC-NEXT:    v_mov_b32_e32 v36, 0
 ; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[40:41], s[26:27]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[38:39], s[24:25]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[44:45], s[30:31]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[30:31], s[22:23]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[42:43], s[28:29]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[28:29], s[20:21]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[26:27], s[18:19]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[24:25], s[16:17]
@@ -1663,41 +1659,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; VGPRRC-NEXT:    v_mov_b32_e32 v40, s20
-; VGPRRC-NEXT:    v_mov_b32_e32 v41, s21
-; VGPRRC-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31]
-; VGPRRC-NEXT:    v_mov_b32_e32 v42, s22
-; VGPRRC-NEXT:    v_mov_b32_e32 v43, s23
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT:    v_mov_b32_e32 v32, s20
+; VGPRRC-NEXT:    v_mov_b32_e32 v33, s21
+; VGPRRC-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31]
+; VGPRRC-NEXT:    v_mov_b32_e32 v34, s22
+; VGPRRC-NEXT:    v_mov_b32_e32 v35, s23
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 2
 ; VGPRRC-NEXT:    v_mov_b32_e32 v16, s16
 ; VGPRRC-NEXT:    v_mov_b32_e32 v17, s17
 ; VGPRRC-NEXT:    v_mov_b32_e32 v18, s18
 ; VGPRRC-NEXT:    v_mov_b32_e32 v19, s19
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 0
 ; VGPRRC-NEXT:    v_mov_b32_e32 v16, s12
 ; VGPRRC-NEXT:    v_mov_b32_e32 v17, s13
 ; VGPRRC-NEXT:    v_mov_b32_e32 v18, s14
 ; VGPRRC-NEXT:    v_mov_b32_e32 v19, s15
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 0
 ; VGPRRC-NEXT:    v_mov_b32_e32 v16, s8
 ; VGPRRC-NEXT:    v_mov_b32_e32 v17, s9
 ; VGPRRC-NEXT:    v_mov_b32_e32 v18, s10
 ; VGPRRC-NEXT:    v_mov_b32_e32 v19, s11
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_endpgm
 ; AGPR-LABEL: test_mfma_f32_32x32x16_f16__vgprcd:
@@ -1831,13 +1827,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
 ; SDAG-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; SDAG-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
 ; SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; SDAG-NEXT:    v_mov_b32_e32 v44, 0
+; SDAG-NEXT:    v_mov_b32_e32 v36, 0
 ; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; SDAG-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; SDAG-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; SDAG-NEXT:    v_mov_b64_e32 v[40:41], s[26:27]
+; SDAG-NEXT:    v_mov_b64_e32 v[38:39], s[24:25]
+; SDAG-NEXT:    v_mov_b64_e32 v[44:45], s[30:31]
 ; SDAG-NEXT:    v_mov_b64_e32 v[30:31], s[22:23]
-; SDAG-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; SDAG-NEXT:    v_mov_b64_e32 v[42:43], s[28:29]
 ; SDAG-NEXT:    v_mov_b64_e32 v[28:29], s[20:21]
 ; SDAG-NEXT:    v_mov_b64_e32 v[26:27], s[18:19]
 ; SDAG-NEXT:    v_mov_b64_e32 v[24:25], s[16:17]
@@ -1845,41 +1841,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
 ; SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
 ; SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
 ; SDAG-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; SDAG-NEXT:    v_mov_b32_e32 v40, s20
-; SDAG-NEXT:    v_mov_b32_e32 v41, s21
-; SDAG-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
-; SDAG-NEXT:    v_mov_b32_e32 v42, s22
-; SDAG-NEXT:    v_mov_b32_e32 v43, s23
-; SDAG-NEXT:    global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT:    v_mov_b32_e32 v32, s20
+; SDAG-NEXT:    v_mov_b32_e32 v33, s21
+; SDAG-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31] cbsz:1 abid:2 blgp:3
+; SDAG-NEXT:    v_mov_b32_e32 v34, s22
+; SDAG-NEXT:    v_mov_b32_e32 v35, s23
+; SDAG-NEXT:    global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 2
 ; SDAG-NEXT:    v_mov_b32_e32 v16, s16
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s17
 ; SDAG-NEXT:    v_mov_b32_e32 v18, s18
 ; SDAG-NEXT:    v_mov_b32_e32 v19, s19
-; SDAG-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
 ; SDAG-NEXT:    v_mov_b32_e32 v16, s12
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s13
 ; SDAG-NEXT:    v_mov_b32_e32 v18, s14
 ; SDAG-NEXT:    v_mov_b32_e32 v19, s15
-; SDAG-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
 ; SDAG-NEXT:    v_mov_b32_e32 v16, s8
 ; SDAG-NEXT:    v_mov_b32_e32 v17, s9
 ; SDAG-NEXT:    v_mov_b32_e32 v18, s10
 ; SDAG-NEXT:    v_mov_b32_e32 v19, s11
-; SDAG-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
-; SDAG-NEXT:    global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT:    global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_endpgm
 ;
@@ -1934,13 +1930,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
 ; HEURRC-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; HEURRC-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
 ; HEURRC-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; HEURRC-NEXT:    v_mov_b32_e32 v44, 0
+; HEURRC-NEXT:    v_mov_b32_e32 v36, 0
 ; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; HEURRC-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; HEURRC-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; HEURRC-NEXT:    v_mov_b64_e32 v[40:41], s[26:27]
+; HEURRC-NEXT:    v_mov_b64_e32 v[38:39], s[24:25]
+; HEURRC-NEXT:    v_mov_b64_e32 v[44:45], s[30:31]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[30:31], s[22:23]
-; HEURRC-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; HEURRC-NEXT:    v_mov_b64_e32 v[42:43], s[28:29]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[28:29], s[20:21]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[26:27], s[18:19]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[24:25], s[16:17]
@@ -1948,41 +1944,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
 ; HEURRC-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
 ; HEURRC-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; HEURRC-NEXT:    v_mov_b32_e32 v40, s20
-; HEURRC-NEXT:    v_mov_b32_e32 v41, s21
-; HEURRC-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
-; HEURRC-NEXT:    v_mov_b32_e32 v42, s22
-; HEURRC-NEXT:    v_mov_b32_e32 v43, s23
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT:    v_mov_b32_e32 v32, s20
+; HEURRC-NEXT:    v_mov_b32_e32 v33, s21
+; HEURRC-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31] cbsz:1 abid:2 blgp:3
+; HEURRC-NEXT:    v_mov_b32_e32 v34, s22
+; HEURRC-NEXT:    v_mov_b32_e32 v35, s23
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_nop 2
 ; HEURRC-NEXT:    v_mov_b32_e32 v16, s16
 ; HEURRC-NEXT:    v_mov_b32_e32 v17, s17
 ; HEURRC-NEXT:    v_mov_b32_e32 v18, s18
 ; HEURRC-NEXT:    v_mov_b32_e32 v19, s19
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_nop 0
 ; HEURRC-NEXT:    v_mov_b32_e32 v16, s12
 ; HEURRC-NEXT:    v_mov_b32_e32 v17, s13
 ; HEURRC-NEXT:    v_mov_b32_e32 v18, s14
 ; HEURRC-NEXT:    v_mov_b32_e32 v19, s15
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_nop 0
 ; HEURRC-NEXT:    v_mov_b32_e32 v16, s8
 ; HEURRC-NEXT:    v_mov_b32_e32 v17, s9
 ; HEURRC-NEXT:    v_mov_b32_e32 v18, s10
 ; HEURRC-NEXT:    v_mov_b32_e32 v19, s11
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
-; HEURRC-NEXT:    global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT:    global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1
 ; HEURRC-NEXT:    s_waitcnt vmcnt(0)
 ; HEURRC-NEXT:    s_endpgm
 ;
@@ -1991,13 +1987,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
 ; VGPRRC-NEXT:    s_load_dwordx8 s[24:31], s[4:5], 0x24
 ; VGPRRC-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
 ; VGPRRC-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; VGPRRC-NEXT:    v_mov_b32_e32 v44, 0
+; VGPRRC-NEXT:    v_mov_b32_e32 v36, 0
 ; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT:    v_mov_b64_e32 v[34:35], s[26:27]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[32:33], s[24:25]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[38:39], s[30:31]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[40:41], s[26:27]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[38:39], s[24:25]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[44:45], s[30:31]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[30:31], s[22:23]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[36:37], s[28:29]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[42:43], s[28:29]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[28:29], s[20:21]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[26:27], s[18:19]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[24:25], s[16:17]
@@ -2005,41 +2001,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
 ; VGPRRC-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; VGPRRC-NEXT:    v_mov_b32_e32 v40, s20
-; VGPRRC-NEXT:    v_mov_b32_e32 v41, s21
-; VGPRRC-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
-; VGPRRC-NEXT:    v_mov_b32_e32 v42, s22
-; VGPRRC-NEXT:    v_mov_b32_e32 v43, s23
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT:    v_mov_b32_e32 v32, s20
+; VGPRRC-NEXT:    v_mov_b32_e32 v33, s21
+; VGPRRC-NEXT:    v_mfma_f32_32x32x16_f16 v[0:15], v[38:41], v[42:45], v[16:31] cbsz:1 abid:2 blgp:3
+; VGPRRC-NEXT:    v_mov_b32_e32 v34, s22
+; VGPRRC-NEXT:    v_mov_b32_e32 v35, s23
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[32:35], s[0:1] offset:48 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 2
 ; VGPRRC-NEXT:    v_mov_b32_e32 v16, s16
 ; VGPRRC-NEXT:    v_mov_b32_e32 v17, s17
 ; VGPRRC-NEXT:    v_mov_b32_e32 v18, s18
 ; VGPRRC-NEXT:    v_mov_b32_e32 v19, s19
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:32 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 0
 ; VGPRRC-NEXT:    v_mov_b32_e32 v16, s12
 ; VGPRRC-NEXT:    v_mov_b32_e32 v17, s13
 ; VGPRRC-NEXT:    v_mov_b32_e32 v18, s14
 ; VGPRRC-NEXT:    v_mov_b32_e32 v19, s15
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] offset:16 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_nop 0
 ; VGPRRC-NEXT:    v_mov_b32_e32 v16, s8
 ; VGPRRC-NEXT:    v_mov_b32_e32 v17, s9
 ; VGPRRC-NEXT:    v_mov_b32_e32 v18, s10
 ; VGPRRC-NEXT:    v_mov_b32_e32 v19, s11
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[16:19], s[0:1] sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[8:11], s[0:1] offset:32 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[12:15], s[0:1] offset:48 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[0:3], s[0:1] sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
-; VGPRRC-NEXT:    global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT:    global_store_dwordx4 v36, v[4:7], s[0:1] offset:16 sc0 sc1
 ; VGPRRC-NEXT:    s_waitcnt vmcnt(0)
 ; VGPRRC-NEXT:    s_endpgm
 ; AGPR-LABEL: test_mfma_f32_32x32x16_f16__vgprcd__flags:
@@ -5425,18 +5421,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs
 ; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; GCN-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GCN-NEXT:    v_mov_b32_e32 v12, 0
+; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; GCN-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; GCN-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; GCN-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; GCN-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; GCN-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; GCN-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; GCN-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; GCN-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GCN-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11]
+; GCN-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3]
 ; GCN-NEXT:    s_nop 7
-; GCN-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; GCN-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; GCN-NEXT:    s_endpgm
 ;
 ; HEURRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd:
@@ -5444,18 +5440,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs
 ; HEURRC-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; HEURRC-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; HEURRC-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; HEURRC-NEXT:    v_mov_b32_e32 v12, 0
+; HEURRC-NEXT:    v_mov_b32_e32 v4, 0
 ; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; HEURRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; HEURRC-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; HEURRC-NEXT:    s_nop 1
-; HEURRC-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11]
+; HEURRC-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3]
 ; HEURRC-NEXT:    s_nop 7
-; HEURRC-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; HEURRC-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; HEURRC-NEXT:    s_endpgm
 ;
 ; VGPRRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd:
@@ -5463,18 +5459,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs
 ; VGPRRC-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; VGPRRC-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; VGPRRC-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; VGPRRC-NEXT:    v_mov_b32_e32 v12, 0
+; VGPRRC-NEXT:    v_mov_b32_e32 v4, 0
 ; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; VGPRRC-NEXT:    s_nop 1
-; VGPRRC-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11]
+; VGPRRC-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3]
 ; VGPRRC-NEXT:    s_nop 7
-; VGPRRC-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; VGPRRC-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; VGPRRC-NEXT:    s_endpgm
 ; AGPR-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd:
 ; AGPR:       ; %bb.0:
@@ -5525,18 +5521,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt
 ; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; GCN-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GCN-NEXT:    v_mov_b32_e32 v12, 0
+; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; GCN-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; GCN-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; GCN-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; GCN-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; GCN-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; GCN-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; GCN-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; GCN-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; GCN-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
+; GCN-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1
 ; GCN-NEXT:    s_nop 7
-; GCN-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; GCN-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; GCN-NEXT:    s_endpgm
 ;
 ; HEURRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags:
@@ -5544,18 +5540,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt
 ; HEURRC-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; HEURRC-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; HEURRC-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; HEURRC-NEXT:    v_mov_b32_e32 v12, 0
+; HEURRC-NEXT:    v_mov_b32_e32 v4, 0
 ; HEURRC-NEXT:    s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; HEURRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; HEURRC-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; HEURRC-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; HEURRC-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; HEURRC-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; HEURRC-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; HEURRC-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; HEURRC-NEXT:    s_nop 1
-; HEURRC-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
+; HEURRC-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1
 ; HEURRC-NEXT:    s_nop 7
-; HEURRC-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; HEURRC-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; HEURRC-NEXT:    s_endpgm
 ;
 ; VGPRRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags:
@@ -5563,18 +5559,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt
 ; VGPRRC-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x34
 ; VGPRRC-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x54
 ; VGPRRC-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
-; VGPRRC-NEXT:    v_mov_b32_e32 v12, 0
+; VGPRRC-NEXT:    v_mov_b32_e32 v4, 0
 ; VGPRRC-NEXT:    s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[10:11], s[2:3]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; VGPRRC-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; VGPRRC-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
 ; VGPRRC-NEXT:    s_nop 1
-; VGPRRC-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
+; VGPRRC-NEXT:    v_mfma_f32_16x16x32_bf16 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1
 ; VGPRRC-NEXT:    s_nop 7
-; VGPRRC-NEXT:    global_store_dwordx4 v12, v[0:3], s[6:7]
+; VGPRRC-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; VGPRRC-NEXT:    s_endpgm
 ; AGPR-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags:
 ; AGPR:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
index 1e44a09..dbea832 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @rcp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
 ; SDAG-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
 ; SDAG-TRUE16-NEXT:    v_rcp_bf16_e32 v0.l, s2
-; SDAG-TRUE16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-TRUE16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: rcp_bf16:
@@ -35,10 +35,10 @@ define amdgpu_kernel void @rcp_bf16_constant_4(ptr addrspace(1) %out) #1 {
 ; SDAG-TRUE16-LABEL: rcp_bf16_constant_4:
 ; SDAG-TRUE16:       ; %bb.0:
 ; SDAG-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3e80
 ; SDAG-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3e80
 ; SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-TRUE16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: rcp_bf16_constant_4:
@@ -57,10 +57,10 @@ define amdgpu_kernel void @rcp_bf16_constant_100(ptr addrspace(1) %out) #1 {
 ; SDAG-TRUE16-LABEL: rcp_bf16_constant_100:
 ; SDAG-TRUE16:       ; %bb.0:
 ; SDAG-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3c24
 ; SDAG-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x3c24
 ; SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-TRUE16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: rcp_bf16_constant_100:
@@ -79,10 +79,10 @@ define amdgpu_kernel void @rcp_undef_bf16(ptr addrspace(1) %out) #1 {
 ; SDAG-TRUE16-LABEL: rcp_undef_bf16:
 ; SDAG-TRUE16:       ; %bb.0:
 ; SDAG-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x7fc0
 ; SDAG-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x7fc0
 ; SDAG-TRUE16-NEXT:    s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-TRUE16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: rcp_undef_bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
index 42d12fd..662dc613 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @rsq_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
 ; SDAG-REAL16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-REAL16-NEXT:    s_wait_kmcnt 0x0
 ; SDAG-REAL16-NEXT:    v_rsq_bf16_e32 v0.l, s2
-; SDAG-REAL16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: rsq_bf16:
@@ -38,7 +38,7 @@ define amdgpu_kernel void @rsq_bf16_constant_4(ptr addrspace(1) %out) #1 {
 ; SDAG-REAL16-NEXT:    v_rsq_bf16_e32 v0.l, 4.0
 ; SDAG-REAL16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-REAL16-NEXT:    s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: rsq_bf16_constant_4:
@@ -61,7 +61,7 @@ define amdgpu_kernel void @rsq_bf16_constant_100(ptr addrspace(1) %out) #1 {
 ; SDAG-REAL16-NEXT:    v_rsq_bf16_e32 v0.l, 0x42c8
 ; SDAG-REAL16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-REAL16-NEXT:    s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: rsq_bf16_constant_100:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
index 6eb9449..ee11b92 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
@@ -17,24 +17,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) %
 ; SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
-; SDAG-NEXT:    v_mov_b32_e32 v16, 0
+; SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-NEXT:    global_load_dwordx4 v[8:11], v0, s[6:7]
+; SDAG-NEXT:    global_load_dwordx4 v[0:3], v0, s[6:7]
 ; SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
 ; SDAG-NEXT:    s_load_dword s16, s[4:5], 0x64
-; SDAG-NEXT:    v_mov_b64_e32 v[14:15], s[2:3]
-; SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[0:1]
+; SDAG-NEXT:    v_mov_b64_e32 v[16:17], s[2:3]
+; SDAG-NEXT:    v_mov_b64_e32 v[14:15], s[0:1]
 ; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; SDAG-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; SDAG-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; SDAG-NEXT:    v_mov_b32_e32 v17, s16
+; SDAG-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; SDAG-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; SDAG-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; SDAG-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; SDAG-NEXT:    v_mov_b32_e32 v5, s16
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
-; SDAG-NEXT:    v_smfmac_f32_16x16x64_f16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
+; SDAG-NEXT:    v_smfmac_f32_16x16x64_f16 v[0:3], v[14:17], v[6:13], v5 cbsz:1 abid:2
 ; SDAG-NEXT:    s_nop 7
-; SDAG-NEXT:    global_store_dwordx4 v16, v[8:11], s[6:7]
+; SDAG-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; SDAG-NEXT:    s_endpgm
 ;
 ; GISEL-LABEL: test_smfmac_f32_16x16x64_f16__vgpr:
@@ -120,30 +120,25 @@ define <4 x float> @test_smfmac_f32_16x16x64_f16__sgpr(<8 x half> inreg %arg0, <
 ; SDAG-LABEL: test_smfmac_f32_16x16x64_f16__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v10, s0
-; SDAG-NEXT:    v_mov_b32_e32 v11, s1
-; SDAG-NEXT:    v_mov_b32_e32 v12, s2
-; SDAG-NEXT:    v_mov_b32_e32 v13, s3
-; SDAG-NEXT:    v_mov_b32_e32 v2, s16
-; SDAG-NEXT:    v_mov_b32_e32 v3, s17
-; SDAG-NEXT:    v_mov_b32_e32 v4, s18
-; SDAG-NEXT:    v_mov_b32_e32 v5, s19
-; SDAG-NEXT:    v_mov_b32_e32 v6, s20
-; SDAG-NEXT:    v_mov_b32_e32 v7, s21
-; SDAG-NEXT:    v_mov_b32_e32 v8, s22
-; SDAG-NEXT:    v_mov_b32_e32 v9, s23
-; SDAG-NEXT:    v_accvgpr_write_b32 a0, s24
-; SDAG-NEXT:    v_accvgpr_write_b32 a1, s25
-; SDAG-NEXT:    v_accvgpr_write_b32 a2, s26
-; SDAG-NEXT:    v_accvgpr_write_b32 a3, s27
-; SDAG-NEXT:    v_mov_b32_e32 v0, s28
+; SDAG-NEXT:    v_mov_b32_e32 v14, s0
+; SDAG-NEXT:    v_mov_b32_e32 v15, s1
+; SDAG-NEXT:    v_mov_b32_e32 v16, s2
+; SDAG-NEXT:    v_mov_b32_e32 v17, s3
+; SDAG-NEXT:    v_mov_b32_e32 v6, s16
+; SDAG-NEXT:    v_mov_b32_e32 v7, s17
+; SDAG-NEXT:    v_mov_b32_e32 v8, s18
+; SDAG-NEXT:    v_mov_b32_e32 v9, s19
+; SDAG-NEXT:    v_mov_b32_e32 v10, s20
+; SDAG-NEXT:    v_mov_b32_e32 v11, s21
+; SDAG-NEXT:    v_mov_b32_e32 v12, s22
+; SDAG-NEXT:    v_mov_b32_e32 v13, s23
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_16x16x64_f16 a[0:3], v[10:13], v[2:9], v0
-; SDAG-NEXT:    s_nop 7
-; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
-; SDAG-NEXT:    v_accvgpr_read_b32 v1, a1
-; SDAG-NEXT:    v_accvgpr_read_b32 v2, a2
-; SDAG-NEXT:    v_accvgpr_read_b32 v3, a3
+; SDAG-NEXT:    v_smfmac_f32_16x16x64_f16 v[0:3], v[14:17], v[6:13], v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_16x16x64_f16__sgpr:
@@ -187,17 +182,17 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x32_f16__vgpr(ptr addrspace(1) %
 ; SDAG-NEXT:    global_load_dwordx4 v[0:3], v16, s[6:7]
 ; SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
 ; SDAG-NEXT:    s_load_dword s16, s[4:5], 0x64
-; SDAG-NEXT:    v_mov_b64_e32 v[26:27], s[2:3]
-; SDAG-NEXT:    v_mov_b64_e32 v[24:25], s[0:1]
+; SDAG-NEXT:    v_mov_b64_e32 v[28:29], s[2:3]
+; SDAG-NEXT:    v_mov_b64_e32 v[26:27], s[0:1]
 ; SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b64_e32 v[22:23], s[14:15]
-; SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
-; SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
-; SDAG-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; SDAG-NEXT:    v_mov_b32_e32 v28, s16
+; SDAG-NEXT:    v_mov_b64_e32 v[24:25], s[14:15]
+; SDAG-NEXT:    v_mov_b64_e32 v[22:23], s[12:13]
+; SDAG-NEXT:    v_mov_b64_e32 v[20:21], s[10:11]
+; SDAG-NEXT:    v_mov_b64_e32 v[18:19], s[8:9]
+; SDAG-NEXT:    v_mov_b32_e32 v16, s16
 ; SDAG-NEXT:    s_waitcnt vmcnt(0)
 ; SDAG-NEXT:    s_nop 0
-; SDAG-NEXT:    v_smfmac_f32_32x32x32_f16 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
+; SDAG-NEXT:    v_smfmac_f32_32x32x32_f16 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
 ; SDAG-NEXT:    v_mov_b32_e32 v16, 0
 ; SDAG-NEXT:    s_nop 10
 ; SDAG-NEXT:    global_store_dwordx4 v16, v[8:11], s[6:7] offset:32
@@ -436,53 +431,37 @@ define <16 x float> @test_smfmac_f32_32x32x32_f16__sgpr(<8 x half> inreg %arg0,
 ; SDAG-LABEL: test_smfmac_f32_32x32x32_f16__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v36, s0
-; SDAG-NEXT:    v_mov_b32_e32 v37, s1
-; SDAG-NEXT:    v_mov_b32_e32 v38, s2
-; SDAG-NEXT:    v_mov_b32_e32 v39, s3
-; SDAG-NEXT:    v_mov_b32_e32 v13, s25
-; SDAG-NEXT:    v_mov_b32_e32 v14, s26
-; SDAG-NEXT:    v_mov_b32_e32 v15, s27
-; SDAG-NEXT:    v_mov_b32_e32 v16, s28
-; SDAG-NEXT:    v_mov_b32_e32 v17, s29
-; SDAG-NEXT:    v_mov_b32_e32 v28, s16
-; SDAG-NEXT:    v_mov_b32_e32 v29, s17
-; SDAG-NEXT:    v_mov_b32_e32 v30, s18
-; SDAG-NEXT:    v_mov_b32_e32 v31, s19
-; SDAG-NEXT:    v_mov_b32_e32 v32, s20
-; SDAG-NEXT:    v_mov_b32_e32 v33, s21
-; SDAG-NEXT:    v_mov_b32_e32 v34, s22
-; SDAG-NEXT:    v_mov_b32_e32 v35, s23
-; SDAG-NEXT:    v_mov_b32_e32 v12, s24
-; SDAG-NEXT:    v_mov_b32_e32 v18, v0
-; SDAG-NEXT:    v_mov_b32_e32 v19, v1
-; SDAG-NEXT:    v_mov_b32_e32 v20, v2
-; SDAG-NEXT:    v_mov_b32_e32 v21, v3
-; SDAG-NEXT:    v_mov_b32_e32 v22, v4
-; SDAG-NEXT:    v_mov_b32_e32 v23, v5
-; SDAG-NEXT:    v_mov_b32_e32 v24, v6
-; SDAG-NEXT:    v_mov_b32_e32 v25, v7
-; SDAG-NEXT:    v_mov_b32_e32 v26, v8
-; SDAG-NEXT:    v_mov_b32_e32 v27, v9
+; SDAG-NEXT:    v_mov_b32_e32 v26, s0
+; SDAG-NEXT:    v_mov_b32_e32 v27, s1
+; SDAG-NEXT:    v_mov_b32_e32 v28, s2
+; SDAG-NEXT:    v_mov_b32_e32 v29, s3
+; SDAG-NEXT:    v_mov_b32_e32 v16, v10
+; SDAG-NEXT:    v_mov_b32_e32 v15, v9
+; SDAG-NEXT:    v_mov_b32_e32 v14, v8
+; SDAG-NEXT:    v_mov_b32_e32 v13, v7
+; SDAG-NEXT:    v_mov_b32_e32 v12, v6
+; SDAG-NEXT:    v_mov_b32_e32 v11, v5
+; SDAG-NEXT:    v_mov_b32_e32 v10, v4
+; SDAG-NEXT:    v_mov_b32_e32 v9, v3
+; SDAG-NEXT:    v_mov_b32_e32 v8, v2
+; SDAG-NEXT:    v_mov_b32_e32 v7, v1
+; SDAG-NEXT:    v_mov_b32_e32 v6, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
+; SDAG-NEXT:    v_mov_b32_e32 v5, s29
+; SDAG-NEXT:    v_mov_b32_e32 v18, s16
+; SDAG-NEXT:    v_mov_b32_e32 v19, s17
+; SDAG-NEXT:    v_mov_b32_e32 v20, s18
+; SDAG-NEXT:    v_mov_b32_e32 v21, s19
+; SDAG-NEXT:    v_mov_b32_e32 v22, s20
+; SDAG-NEXT:    v_mov_b32_e32 v23, s21
+; SDAG-NEXT:    v_mov_b32_e32 v24, s22
+; SDAG-NEXT:    v_mov_b32_e32 v25, s23
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_32x32x32_f16 v[12:27], v[36:39], v[28:35], v10
-; SDAG-NEXT:    s_nop 11
-; SDAG-NEXT:    v_mov_b32_e32 v0, v12
-; SDAG-NEXT:    v_mov_b32_e32 v1, v13
-; SDAG-NEXT:    v_mov_b32_e32 v2, v14
-; SDAG-NEXT:    v_mov_b32_e32 v3, v15
-; SDAG-NEXT:    v_mov_b32_e32 v4, v16
-; SDAG-NEXT:    v_mov_b32_e32 v5, v17
-; SDAG-NEXT:    v_mov_b32_e32 v6, v18
-; SDAG-NEXT:    v_mov_b32_e32 v7, v19
-; SDAG-NEXT:    v_mov_b32_e32 v8, v20
-; SDAG-NEXT:    v_mov_b32_e32 v9, v21
-; SDAG-NEXT:    v_mov_b32_e32 v10, v22
-; SDAG-NEXT:    v_mov_b32_e32 v11, v23
-; SDAG-NEXT:    v_mov_b32_e32 v12, v24
-; SDAG-NEXT:    v_mov_b32_e32 v13, v25
-; SDAG-NEXT:    v_mov_b32_e32 v14, v26
-; SDAG-NEXT:    v_mov_b32_e32 v15, v27
+; SDAG-NEXT:    v_smfmac_f32_32x32x32_f16 v[0:15], v[26:29], v[18:25], v16
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_32x32x32_f16__sgpr:
@@ -541,24 +520,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_bf16__vgpr(ptr addrspace(1)
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
 ; GCN-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GCN-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
-; GCN-NEXT:    v_mov_b32_e32 v16, 0
+; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    global_load_dwordx4 v[8:11], v0, s[6:7]
+; GCN-NEXT:    global_load_dwordx4 v[0:3], v0, s[6:7]
 ; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
 ; GCN-NEXT:    s_load_dword s16, s[4:5], 0x64
-; GCN-NEXT:    v_mov_b64_e32 v[14:15], s[2:3]
-; GCN-NEXT:    v_mov_b64_e32 v[12:13], s[0:1]
+; GCN-NEXT:    v_mov_b64_e32 v[16:17], s[2:3]
+; GCN-NEXT:    v_mov_b64_e32 v[14:15], s[0:1]
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b64_e32 v[0:1], s[8:9]
-; GCN-NEXT:    v_mov_b64_e32 v[2:3], s[10:11]
-; GCN-NEXT:    v_mov_b64_e32 v[4:5], s[12:13]
-; GCN-NEXT:    v_mov_b64_e32 v[6:7], s[14:15]
-; GCN-NEXT:    v_mov_b32_e32 v17, s16
+; GCN-NEXT:    v_mov_b64_e32 v[6:7], s[8:9]
+; GCN-NEXT:    v_mov_b64_e32 v[8:9], s[10:11]
+; GCN-NEXT:    v_mov_b64_e32 v[10:11], s[12:13]
+; GCN-NEXT:    v_mov_b64_e32 v[12:13], s[14:15]
+; GCN-NEXT:    v_mov_b32_e32 v5, s16
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    v_smfmac_f32_16x16x64_bf16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
+; GCN-NEXT:    v_smfmac_f32_16x16x64_bf16 v[0:3], v[14:17], v[6:13], v5 cbsz:1 abid:2
 ; GCN-NEXT:    s_nop 7
-; GCN-NEXT:    global_store_dwordx4 v16, v[8:11], s[6:7]
+; GCN-NEXT:    global_store_dwordx4 v4, v[0:3], s[6:7]
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -618,30 +597,25 @@ define <4 x float> @test_smfmac_f32_16x16x64_bf16__sgpr(<8 x bfloat> inreg %arg0
 ; GCN-LABEL: test_smfmac_f32_16x16x64_bf16__sgpr:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v10, s0
-; GCN-NEXT:    v_mov_b32_e32 v11, s1
-; GCN-NEXT:    v_mov_b32_e32 v12, s2
-; GCN-NEXT:    v_mov_b32_e32 v13, s3
-; GCN-NEXT:    v_mov_b32_e32 v2, s16
-; GCN-NEXT:    v_mov_b32_e32 v3, s17
-; GCN-NEXT:    v_mov_b32_e32 v4, s18
-; GCN-NEXT:    v_mov_b32_e32 v5, s19
-; GCN-NEXT:    v_mov_b32_e32 v6, s20
-; GCN-NEXT:    v_mov_b32_e32 v7, s21
-; GCN-NEXT:    v_mov_b32_e32 v8, s22
-; GCN-NEXT:    v_mov_b32_e32 v9, s23
-; GCN-NEXT:    v_accvgpr_write_b32 a0, s24
-; GCN-NEXT:    v_accvgpr_write_b32 a1, s25
-; GCN-NEXT:    v_accvgpr_write_b32 a2, s26
-; GCN-NEXT:    v_accvgpr_write_b32 a3, s27
-; GCN-NEXT:    v_mov_b32_e32 v0, s28
+; GCN-NEXT:    v_mov_b32_e32 v14, s0
+; GCN-NEXT:    v_mov_b32_e32 v15, s1
+; GCN-NEXT:    v_mov_b32_e32 v16, s2
+; GCN-NEXT:    v_mov_b32_e32 v17, s3
+; GCN-NEXT:    v_mov_b32_e32 v6, s16
+; GCN-NEXT:    v_mov_b32_e32 v7, s17
+; GCN-NEXT:    v_mov_b32_e32 v8, s18
+; GCN-NEXT:    v_mov_b32_e32 v9, s19
+; GCN-NEXT:    v_mov_b32_e32 v10, s20
+; GCN-NEXT:    v_mov_b32_e32 v11, s21
+; GCN-NEXT:    v_mov_b32_e32 v12, s22
+; GCN-NEXT:    v_mov_b32_e32 v13, s23
+; GCN-NEXT:    v_mov_b32_e32 v0, s24
+; GCN-NEXT:    v_mov_b32_e32 v1, s25
+; GCN-NEXT:    v_mov_b32_e32 v2, s26
+; GCN-NEXT:    v_mov_b32_e32 v3, s27
+; GCN-NEXT:    v_mov_b32_e32 v4, s28
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_smfmac_f32_16x16x64_bf16 a[0:3], v[10:13], v[2:9], v0
-; GCN-NEXT:    s_nop 7
-; GCN-NEXT:    v_accvgpr_read_b32 v0, a0
-; GCN-NEXT:    v_accvgpr_read_b32 v1, a1
-; GCN-NEXT:    v_accvgpr_read_b32 v2, a2
-; GCN-NEXT:    v_accvgpr_read_b32 v3, a3
+; GCN-NEXT:    v_smfmac_f32_16x16x64_bf16 v[0:3], v[14:17], v[6:13], v4
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %result = call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf16(<8 x bfloat> %arg0, <16 x bfloat> %arg1, <4 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0)
   ret <4 x float> %result
@@ -667,17 +641,17 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x32_bf16__vgpr(ptr addrspace(1)
 ; GCN-NEXT:    global_load_dwordx4 v[0:3], v16, s[6:7]
 ; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
 ; GCN-NEXT:    s_load_dword s16, s[4:5], 0x64
-; GCN-NEXT:    v_mov_b64_e32 v[26:27], s[2:3]
-; GCN-NEXT:    v_mov_b64_e32 v[24:25], s[0:1]
+; GCN-NEXT:    v_mov_b64_e32 v[28:29], s[2:3]
+; GCN-NEXT:    v_mov_b64_e32 v[26:27], s[0:1]
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b64_e32 v[22:23], s[14:15]
-; GCN-NEXT:    v_mov_b64_e32 v[20:21], s[12:13]
-; GCN-NEXT:    v_mov_b64_e32 v[18:19], s[10:11]
-; GCN-NEXT:    v_mov_b64_e32 v[16:17], s[8:9]
-; GCN-NEXT:    v_mov_b32_e32 v28, s16
+; GCN-NEXT:    v_mov_b64_e32 v[24:25], s[14:15]
+; GCN-NEXT:    v_mov_b64_e32 v[22:23], s[12:13]
+; GCN-NEXT:    v_mov_b64_e32 v[20:21], s[10:11]
+; GCN-NEXT:    v_mov_b64_e32 v[18:19], s[8:9]
+; GCN-NEXT:    v_mov_b32_e32 v16, s16
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    v_smfmac_f32_32x32x32_bf16 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
+; GCN-NEXT:    v_smfmac_f32_32x32x32_bf16 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
 ; GCN-NEXT:    v_mov_b32_e32 v16, 0
 ; GCN-NEXT:    s_nop 10
 ; GCN-NEXT:    global_store_dwordx4 v16, v[8:11], s[6:7] offset:32
@@ -779,53 +753,37 @@ define <16 x float> @test_smfmac_f32_32x32x32_bf16__sgpr(<8 x bfloat> inreg %arg
 ; GCN-LABEL: test_smfmac_f32_32x32x32_bf16__sgpr:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v36, s0
-; GCN-NEXT:    v_mov_b32_e32 v37, s1
-; GCN-NEXT:    v_mov_b32_e32 v38, s2
-; GCN-NEXT:    v_mov_b32_e32 v39, s3
-; GCN-NEXT:    v_mov_b32_e32 v13, s25
-; GCN-NEXT:    v_mov_b32_e32 v14, s26
-; GCN-NEXT:    v_mov_b32_e32 v15, s27
-; GCN-NEXT:    v_mov_b32_e32 v16, s28
-; GCN-NEXT:    v_mov_b32_e32 v17, s29
-; GCN-NEXT:    v_mov_b32_e32 v28, s16
-; GCN-NEXT:    v_mov_b32_e32 v29, s17
-; GCN-NEXT:    v_mov_b32_e32 v30, s18
-; GCN-NEXT:    v_mov_b32_e32 v31, s19
-; GCN-NEXT:    v_mov_b32_e32 v32, s20
-; GCN-NEXT:    v_mov_b32_e32 v33, s21
-; GCN-NEXT:    v_mov_b32_e32 v34, s22
-; GCN-NEXT:    v_mov_b32_e32 v35, s23
-; GCN-NEXT:    v_mov_b32_e32 v12, s24
-; GCN-NEXT:    v_mov_b32_e32 v18, v0
-; GCN-NEXT:    v_mov_b32_e32 v19, v1
-; GCN-NEXT:    v_mov_b32_e32 v20, v2
-; GCN-NEXT:    v_mov_b32_e32 v21, v3
-; GCN-NEXT:    v_mov_b32_e32 v22, v4
-; GCN-NEXT:    v_mov_b32_e32 v23, v5
-; GCN-NEXT:    v_mov_b32_e32 v24, v6
-; GCN-NEXT:    v_mov_b32_e32 v25, v7
-; GCN-NEXT:    v_mov_b32_e32 v26, v8
-; GCN-NEXT:    v_mov_b32_e32 v27, v9
+; GCN-NEXT:    v_mov_b32_e32 v26, s0
+; GCN-NEXT:    v_mov_b32_e32 v27, s1
+; GCN-NEXT:    v_mov_b32_e32 v28, s2
+; GCN-NEXT:    v_mov_b32_e32 v29, s3
+; GCN-NEXT:    v_mov_b32_e32 v16, v10
+; GCN-NEXT:    v_mov_b32_e32 v15, v9
+; GCN-NEXT:    v_mov_b32_e32 v14, v8
+; GCN-NEXT:    v_mov_b32_e32 v13, v7
+; GCN-NEXT:    v_mov_b32_e32 v12, v6
+; GCN-NEXT:    v_mov_b32_e32 v11, v5
+; GCN-NEXT:    v_mov_b32_e32 v10, v4
+; GCN-NEXT:    v_mov_b32_e32 v9, v3
+; GCN-NEXT:    v_mov_b32_e32 v8, v2
+; GCN-NEXT:    v_mov_b32_e32 v7, v1
+; GCN-NEXT:    v_mov_b32_e32 v6, v0
+; GCN-NEXT:    v_mov_b32_e32 v0, s24
+; GCN-NEXT:    v_mov_b32_e32 v1, s25
+; GCN-NEXT:    v_mov_b32_e32 v2, s26
+; GCN-NEXT:    v_mov_b32_e32 v3, s27
+; GCN-NEXT:    v_mov_b32_e32 v4, s28
+; GCN-NEXT:    v_mov_b32_e32 v5, s29
+; GCN-NEXT:    v_mov_b32_e32 v18, s16
+; GCN-NEXT:    v_mov_b32_e32 v19, s17
+; GCN-NEXT:    v_mov_b32_e32 v20, s18
+; GCN-NEXT:    v_mov_b32_e32 v21, s19
+; GCN-NEXT:    v_mov_b32_e32 v22, s20
+; GCN-NEXT:    v_mov_b32_e32 v23, s21
+; GCN-NEXT:    v_mov_b32_e32 v24, s22
+; GCN-NEXT:    v_mov_b32_e32 v25, s23
 ; GCN-NEXT:    s_nop 1
-; GCN-NEXT:    v_smfmac_f32_32x32x32_bf16 v[12:27], v[36:39], v[28:35], v10
-; GCN-NEXT:    s_nop 11
-; GCN-NEXT:    v_mov_b32_e32 v0, v12
-; GCN-NEXT:    v_mov_b32_e32 v1, v13
-; GCN-NEXT:    v_mov_b32_e32 v2, v14
-; GCN-NEXT:    v_mov_b32_e32 v3, v15
-; GCN-NEXT:    v_mov_b32_e32 v4, v16
-; GCN-NEXT:    v_mov_b32_e32 v5, v17
-; GCN-NEXT:    v_mov_b32_e32 v6, v18
-; GCN-NEXT:    v_mov_b32_e32 v7, v19
-; GCN-NEXT:    v_mov_b32_e32 v8, v20
-; GCN-NEXT:    v_mov_b32_e32 v9, v21
-; GCN-NEXT:    v_mov_b32_e32 v10, v22
-; GCN-NEXT:    v_mov_b32_e32 v11, v23
-; GCN-NEXT:    v_mov_b32_e32 v12, v24
-; GCN-NEXT:    v_mov_b32_e32 v13, v25
-; GCN-NEXT:    v_mov_b32_e32 v14, v26
-; GCN-NEXT:    v_mov_b32_e32 v15, v27
+; GCN-NEXT:    v_smfmac_f32_32x32x32_bf16 v[0:15], v[26:29], v[18:25], v16
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
   %result = call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf16(<8 x bfloat> %arg0, <16 x bfloat> %arg1, <16 x float> %arg2, i32 %arg3, i32 immarg 0, i32 immarg 0)
   ret <16 x float> %result
@@ -953,30 +911,25 @@ define <4 x i32> @test_smfmac_i32_16x16x128_i8__sgpr(<4 x i32> inreg %arg0, <8 x
 ; SDAG-LABEL: test_smfmac_i32_16x16x128_i8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v10, s0
-; SDAG-NEXT:    v_mov_b32_e32 v11, s1
-; SDAG-NEXT:    v_mov_b32_e32 v12, s2
-; SDAG-NEXT:    v_mov_b32_e32 v13, s3
-; SDAG-NEXT:    v_mov_b32_e32 v2, s16
-; SDAG-NEXT:    v_mov_b32_e32 v3, s17
-; SDAG-NEXT:    v_mov_b32_e32 v4, s18
-; SDAG-NEXT:    v_mov_b32_e32 v5, s19
-; SDAG-NEXT:    v_mov_b32_e32 v6, s20
-; SDAG-NEXT:    v_mov_b32_e32 v7, s21
-; SDAG-NEXT:    v_mov_b32_e32 v8, s22
-; SDAG-NEXT:    v_mov_b32_e32 v9, s23
-; SDAG-NEXT:    v_accvgpr_write_b32 a0, s24
-; SDAG-NEXT:    v_accvgpr_write_b32 a1, s25
-; SDAG-NEXT:    v_accvgpr_write_b32 a2, s26
-; SDAG-NEXT:    v_accvgpr_write_b32 a3, s27
-; SDAG-NEXT:    v_mov_b32_e32 v0, s28
+; SDAG-NEXT:    v_mov_b32_e32 v14, s0
+; SDAG-NEXT:    v_mov_b32_e32 v15, s1
+; SDAG-NEXT:    v_mov_b32_e32 v16, s2
+; SDAG-NEXT:    v_mov_b32_e32 v17, s3
+; SDAG-NEXT:    v_mov_b32_e32 v6, s16
+; SDAG-NEXT:    v_mov_b32_e32 v7, s17
+; SDAG-NEXT:    v_mov_b32_e32 v8, s18
+; SDAG-NEXT:    v_mov_b32_e32 v9, s19
+; SDAG-NEXT:    v_mov_b32_e32 v10, s20
+; SDAG-NEXT:    v_mov_b32_e32 v11, s21
+; SDAG-NEXT:    v_mov_b32_e32 v12, s22
+; SDAG-NEXT:    v_mov_b32_e32 v13, s23
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_i32_16x16x128_i8 a[0:3], v[10:13], v[2:9], v0
-; SDAG-NEXT:    s_nop 7
-; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
-; SDAG-NEXT:    v_accvgpr_read_b32 v1, a1
-; SDAG-NEXT:    v_accvgpr_read_b32 v2, a2
-; SDAG-NEXT:    v_accvgpr_read_b32 v3, a3
+; SDAG-NEXT:    v_smfmac_i32_16x16x128_i8 v[0:3], v[14:17], v[6:13], v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__sgpr:
@@ -1275,53 +1228,37 @@ define <16 x i32> @test_smfmac_i32_32x32x64_i8__sgpr(<4 x i32> inreg %arg0, <8 x
 ; SDAG-LABEL: test_smfmac_i32_32x32x64_i8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v36, s0
-; SDAG-NEXT:    v_mov_b32_e32 v37, s1
-; SDAG-NEXT:    v_mov_b32_e32 v38, s2
-; SDAG-NEXT:    v_mov_b32_e32 v39, s3
-; SDAG-NEXT:    v_mov_b32_e32 v13, s25
-; SDAG-NEXT:    v_mov_b32_e32 v14, s26
-; SDAG-NEXT:    v_mov_b32_e32 v15, s27
-; SDAG-NEXT:    v_mov_b32_e32 v16, s28
-; SDAG-NEXT:    v_mov_b32_e32 v17, s29
-; SDAG-NEXT:    v_mov_b32_e32 v28, s16
-; SDAG-NEXT:    v_mov_b32_e32 v29, s17
-; SDAG-NEXT:    v_mov_b32_e32 v30, s18
-; SDAG-NEXT:    v_mov_b32_e32 v31, s19
-; SDAG-NEXT:    v_mov_b32_e32 v32, s20
-; SDAG-NEXT:    v_mov_b32_e32 v33, s21
-; SDAG-NEXT:    v_mov_b32_e32 v34, s22
-; SDAG-NEXT:    v_mov_b32_e32 v35, s23
-; SDAG-NEXT:    v_mov_b32_e32 v12, s24
-; SDAG-NEXT:    v_mov_b32_e32 v18, v0
-; SDAG-NEXT:    v_mov_b32_e32 v19, v1
-; SDAG-NEXT:    v_mov_b32_e32 v20, v2
-; SDAG-NEXT:    v_mov_b32_e32 v21, v3
-; SDAG-NEXT:    v_mov_b32_e32 v22, v4
-; SDAG-NEXT:    v_mov_b32_e32 v23, v5
-; SDAG-NEXT:    v_mov_b32_e32 v24, v6
-; SDAG-NEXT:    v_mov_b32_e32 v25, v7
-; SDAG-NEXT:    v_mov_b32_e32 v26, v8
-; SDAG-NEXT:    v_mov_b32_e32 v27, v9
+; SDAG-NEXT:    v_mov_b32_e32 v26, s0
+; SDAG-NEXT:    v_mov_b32_e32 v27, s1
+; SDAG-NEXT:    v_mov_b32_e32 v28, s2
+; SDAG-NEXT:    v_mov_b32_e32 v29, s3
+; SDAG-NEXT:    v_mov_b32_e32 v16, v10
+; SDAG-NEXT:    v_mov_b32_e32 v15, v9
+; SDAG-NEXT:    v_mov_b32_e32 v14, v8
+; SDAG-NEXT:    v_mov_b32_e32 v13, v7
+; SDAG-NEXT:    v_mov_b32_e32 v12, v6
+; SDAG-NEXT:    v_mov_b32_e32 v11, v5
+; SDAG-NEXT:    v_mov_b32_e32 v10, v4
+; SDAG-NEXT:    v_mov_b32_e32 v9, v3
+; SDAG-NEXT:    v_mov_b32_e32 v8, v2
+; SDAG-NEXT:    v_mov_b32_e32 v7, v1
+; SDAG-NEXT:    v_mov_b32_e32 v6, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
+; SDAG-NEXT:    v_mov_b32_e32 v5, s29
+; SDAG-NEXT:    v_mov_b32_e32 v18, s16
+; SDAG-NEXT:    v_mov_b32_e32 v19, s17
+; SDAG-NEXT:    v_mov_b32_e32 v20, s18
+; SDAG-NEXT:    v_mov_b32_e32 v21, s19
+; SDAG-NEXT:    v_mov_b32_e32 v22, s20
+; SDAG-NEXT:    v_mov_b32_e32 v23, s21
+; SDAG-NEXT:    v_mov_b32_e32 v24, s22
+; SDAG-NEXT:    v_mov_b32_e32 v25, s23
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_i32_32x32x64_i8 v[12:27], v[36:39], v[28:35], v10
-; SDAG-NEXT:    s_nop 11
-; SDAG-NEXT:    v_mov_b32_e32 v0, v12
-; SDAG-NEXT:    v_mov_b32_e32 v1, v13
-; SDAG-NEXT:    v_mov_b32_e32 v2, v14
-; SDAG-NEXT:    v_mov_b32_e32 v3, v15
-; SDAG-NEXT:    v_mov_b32_e32 v4, v16
-; SDAG-NEXT:    v_mov_b32_e32 v5, v17
-; SDAG-NEXT:    v_mov_b32_e32 v6, v18
-; SDAG-NEXT:    v_mov_b32_e32 v7, v19
-; SDAG-NEXT:    v_mov_b32_e32 v8, v20
-; SDAG-NEXT:    v_mov_b32_e32 v9, v21
-; SDAG-NEXT:    v_mov_b32_e32 v10, v22
-; SDAG-NEXT:    v_mov_b32_e32 v11, v23
-; SDAG-NEXT:    v_mov_b32_e32 v12, v24
-; SDAG-NEXT:    v_mov_b32_e32 v13, v25
-; SDAG-NEXT:    v_mov_b32_e32 v14, v26
-; SDAG-NEXT:    v_mov_b32_e32 v15, v27
+; SDAG-NEXT:    v_smfmac_i32_32x32x64_i8 v[0:15], v[26:29], v[18:25], v16
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_i32_32x32x64_i8__sgpr:
@@ -1489,30 +1426,25 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_bf8__sgpr(<4 x i32> inreg %arg
 ; SDAG-LABEL: test_smfmac_f32_16x16x128_bf8_bf8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v10, s0
-; SDAG-NEXT:    v_mov_b32_e32 v11, s1
-; SDAG-NEXT:    v_mov_b32_e32 v12, s2
-; SDAG-NEXT:    v_mov_b32_e32 v13, s3
-; SDAG-NEXT:    v_mov_b32_e32 v2, s16
-; SDAG-NEXT:    v_mov_b32_e32 v3, s17
-; SDAG-NEXT:    v_mov_b32_e32 v4, s18
-; SDAG-NEXT:    v_mov_b32_e32 v5, s19
-; SDAG-NEXT:    v_mov_b32_e32 v6, s20
-; SDAG-NEXT:    v_mov_b32_e32 v7, s21
-; SDAG-NEXT:    v_mov_b32_e32 v8, s22
-; SDAG-NEXT:    v_mov_b32_e32 v9, s23
-; SDAG-NEXT:    v_accvgpr_write_b32 a0, s24
-; SDAG-NEXT:    v_accvgpr_write_b32 a1, s25
-; SDAG-NEXT:    v_accvgpr_write_b32 a2, s26
-; SDAG-NEXT:    v_accvgpr_write_b32 a3, s27
-; SDAG-NEXT:    v_mov_b32_e32 v0, s28
+; SDAG-NEXT:    v_mov_b32_e32 v14, s0
+; SDAG-NEXT:    v_mov_b32_e32 v15, s1
+; SDAG-NEXT:    v_mov_b32_e32 v16, s2
+; SDAG-NEXT:    v_mov_b32_e32 v17, s3
+; SDAG-NEXT:    v_mov_b32_e32 v6, s16
+; SDAG-NEXT:    v_mov_b32_e32 v7, s17
+; SDAG-NEXT:    v_mov_b32_e32 v8, s18
+; SDAG-NEXT:    v_mov_b32_e32 v9, s19
+; SDAG-NEXT:    v_mov_b32_e32 v10, s20
+; SDAG-NEXT:    v_mov_b32_e32 v11, s21
+; SDAG-NEXT:    v_mov_b32_e32 v12, s22
+; SDAG-NEXT:    v_mov_b32_e32 v13, s23
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_16x16x128_bf8_bf8 a[0:3], v[10:13], v[2:9], v0
-; SDAG-NEXT:    s_nop 7
-; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
-; SDAG-NEXT:    v_accvgpr_read_b32 v1, a1
-; SDAG-NEXT:    v_accvgpr_read_b32 v2, a2
-; SDAG-NEXT:    v_accvgpr_read_b32 v3, a3
+; SDAG-NEXT:    v_smfmac_f32_16x16x128_bf8_bf8 v[0:3], v[14:17], v[6:13], v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_bf8__sgpr:
@@ -1658,30 +1590,25 @@ define <4 x float> @test_smfmac_f32_16x16x128_bf8_fp8__sgpr(<4 x i32> inreg %arg
 ; SDAG-LABEL: test_smfmac_f32_16x16x128_bf8_fp8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v10, s0
-; SDAG-NEXT:    v_mov_b32_e32 v11, s1
-; SDAG-NEXT:    v_mov_b32_e32 v12, s2
-; SDAG-NEXT:    v_mov_b32_e32 v13, s3
-; SDAG-NEXT:    v_mov_b32_e32 v2, s16
-; SDAG-NEXT:    v_mov_b32_e32 v3, s17
-; SDAG-NEXT:    v_mov_b32_e32 v4, s18
-; SDAG-NEXT:    v_mov_b32_e32 v5, s19
-; SDAG-NEXT:    v_mov_b32_e32 v6, s20
-; SDAG-NEXT:    v_mov_b32_e32 v7, s21
-; SDAG-NEXT:    v_mov_b32_e32 v8, s22
-; SDAG-NEXT:    v_mov_b32_e32 v9, s23
-; SDAG-NEXT:    v_accvgpr_write_b32 a0, s24
-; SDAG-NEXT:    v_accvgpr_write_b32 a1, s25
-; SDAG-NEXT:    v_accvgpr_write_b32 a2, s26
-; SDAG-NEXT:    v_accvgpr_write_b32 a3, s27
-; SDAG-NEXT:    v_mov_b32_e32 v0, s28
+; SDAG-NEXT:    v_mov_b32_e32 v14, s0
+; SDAG-NEXT:    v_mov_b32_e32 v15, s1
+; SDAG-NEXT:    v_mov_b32_e32 v16, s2
+; SDAG-NEXT:    v_mov_b32_e32 v17, s3
+; SDAG-NEXT:    v_mov_b32_e32 v6, s16
+; SDAG-NEXT:    v_mov_b32_e32 v7, s17
+; SDAG-NEXT:    v_mov_b32_e32 v8, s18
+; SDAG-NEXT:    v_mov_b32_e32 v9, s19
+; SDAG-NEXT:    v_mov_b32_e32 v10, s20
+; SDAG-NEXT:    v_mov_b32_e32 v11, s21
+; SDAG-NEXT:    v_mov_b32_e32 v12, s22
+; SDAG-NEXT:    v_mov_b32_e32 v13, s23
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_16x16x128_bf8_fp8 a[0:3], v[10:13], v[2:9], v0
-; SDAG-NEXT:    s_nop 7
-; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
-; SDAG-NEXT:    v_accvgpr_read_b32 v1, a1
-; SDAG-NEXT:    v_accvgpr_read_b32 v2, a2
-; SDAG-NEXT:    v_accvgpr_read_b32 v3, a3
+; SDAG-NEXT:    v_smfmac_f32_16x16x128_bf8_fp8 v[0:3], v[14:17], v[6:13], v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_fp8__sgpr:
@@ -1827,30 +1754,25 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_bf8__sgpr(<4 x i32> inreg %arg
 ; SDAG-LABEL: test_smfmac_f32_16x16x128_fp8_bf8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v10, s0
-; SDAG-NEXT:    v_mov_b32_e32 v11, s1
-; SDAG-NEXT:    v_mov_b32_e32 v12, s2
-; SDAG-NEXT:    v_mov_b32_e32 v13, s3
-; SDAG-NEXT:    v_mov_b32_e32 v2, s16
-; SDAG-NEXT:    v_mov_b32_e32 v3, s17
-; SDAG-NEXT:    v_mov_b32_e32 v4, s18
-; SDAG-NEXT:    v_mov_b32_e32 v5, s19
-; SDAG-NEXT:    v_mov_b32_e32 v6, s20
-; SDAG-NEXT:    v_mov_b32_e32 v7, s21
-; SDAG-NEXT:    v_mov_b32_e32 v8, s22
-; SDAG-NEXT:    v_mov_b32_e32 v9, s23
-; SDAG-NEXT:    v_accvgpr_write_b32 a0, s24
-; SDAG-NEXT:    v_accvgpr_write_b32 a1, s25
-; SDAG-NEXT:    v_accvgpr_write_b32 a2, s26
-; SDAG-NEXT:    v_accvgpr_write_b32 a3, s27
-; SDAG-NEXT:    v_mov_b32_e32 v0, s28
+; SDAG-NEXT:    v_mov_b32_e32 v14, s0
+; SDAG-NEXT:    v_mov_b32_e32 v15, s1
+; SDAG-NEXT:    v_mov_b32_e32 v16, s2
+; SDAG-NEXT:    v_mov_b32_e32 v17, s3
+; SDAG-NEXT:    v_mov_b32_e32 v6, s16
+; SDAG-NEXT:    v_mov_b32_e32 v7, s17
+; SDAG-NEXT:    v_mov_b32_e32 v8, s18
+; SDAG-NEXT:    v_mov_b32_e32 v9, s19
+; SDAG-NEXT:    v_mov_b32_e32 v10, s20
+; SDAG-NEXT:    v_mov_b32_e32 v11, s21
+; SDAG-NEXT:    v_mov_b32_e32 v12, s22
+; SDAG-NEXT:    v_mov_b32_e32 v13, s23
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_16x16x128_fp8_bf8 a[0:3], v[10:13], v[2:9], v0
-; SDAG-NEXT:    s_nop 7
-; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
-; SDAG-NEXT:    v_accvgpr_read_b32 v1, a1
-; SDAG-NEXT:    v_accvgpr_read_b32 v2, a2
-; SDAG-NEXT:    v_accvgpr_read_b32 v3, a3
+; SDAG-NEXT:    v_smfmac_f32_16x16x128_fp8_bf8 v[0:3], v[14:17], v[6:13], v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_bf8__sgpr:
@@ -1996,30 +1918,25 @@ define <4 x float> @test_smfmac_f32_16x16x128_fp8_fp8__sgpr(<4 x i32> inreg %arg
 ; SDAG-LABEL: test_smfmac_f32_16x16x128_fp8_fp8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v10, s0
-; SDAG-NEXT:    v_mov_b32_e32 v11, s1
-; SDAG-NEXT:    v_mov_b32_e32 v12, s2
-; SDAG-NEXT:    v_mov_b32_e32 v13, s3
-; SDAG-NEXT:    v_mov_b32_e32 v2, s16
-; SDAG-NEXT:    v_mov_b32_e32 v3, s17
-; SDAG-NEXT:    v_mov_b32_e32 v4, s18
-; SDAG-NEXT:    v_mov_b32_e32 v5, s19
-; SDAG-NEXT:    v_mov_b32_e32 v6, s20
-; SDAG-NEXT:    v_mov_b32_e32 v7, s21
-; SDAG-NEXT:    v_mov_b32_e32 v8, s22
-; SDAG-NEXT:    v_mov_b32_e32 v9, s23
-; SDAG-NEXT:    v_accvgpr_write_b32 a0, s24
-; SDAG-NEXT:    v_accvgpr_write_b32 a1, s25
-; SDAG-NEXT:    v_accvgpr_write_b32 a2, s26
-; SDAG-NEXT:    v_accvgpr_write_b32 a3, s27
-; SDAG-NEXT:    v_mov_b32_e32 v0, s28
+; SDAG-NEXT:    v_mov_b32_e32 v14, s0
+; SDAG-NEXT:    v_mov_b32_e32 v15, s1
+; SDAG-NEXT:    v_mov_b32_e32 v16, s2
+; SDAG-NEXT:    v_mov_b32_e32 v17, s3
+; SDAG-NEXT:    v_mov_b32_e32 v6, s16
+; SDAG-NEXT:    v_mov_b32_e32 v7, s17
+; SDAG-NEXT:    v_mov_b32_e32 v8, s18
+; SDAG-NEXT:    v_mov_b32_e32 v9, s19
+; SDAG-NEXT:    v_mov_b32_e32 v10, s20
+; SDAG-NEXT:    v_mov_b32_e32 v11, s21
+; SDAG-NEXT:    v_mov_b32_e32 v12, s22
+; SDAG-NEXT:    v_mov_b32_e32 v13, s23
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_16x16x128_fp8_fp8 a[0:3], v[10:13], v[2:9], v0
-; SDAG-NEXT:    s_nop 7
-; SDAG-NEXT:    v_accvgpr_read_b32 v0, a0
-; SDAG-NEXT:    v_accvgpr_read_b32 v1, a1
-; SDAG-NEXT:    v_accvgpr_read_b32 v2, a2
-; SDAG-NEXT:    v_accvgpr_read_b32 v3, a3
+; SDAG-NEXT:    v_smfmac_f32_16x16x128_fp8_fp8 v[0:3], v[14:17], v[6:13], v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_fp8__sgpr:
@@ -2318,53 +2235,37 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_bf8__sgpr(<4 x i32> inreg %arg
 ; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v36, s0
-; SDAG-NEXT:    v_mov_b32_e32 v37, s1
-; SDAG-NEXT:    v_mov_b32_e32 v38, s2
-; SDAG-NEXT:    v_mov_b32_e32 v39, s3
-; SDAG-NEXT:    v_mov_b32_e32 v13, s25
-; SDAG-NEXT:    v_mov_b32_e32 v14, s26
-; SDAG-NEXT:    v_mov_b32_e32 v15, s27
-; SDAG-NEXT:    v_mov_b32_e32 v16, s28
-; SDAG-NEXT:    v_mov_b32_e32 v17, s29
-; SDAG-NEXT:    v_mov_b32_e32 v28, s16
-; SDAG-NEXT:    v_mov_b32_e32 v29, s17
-; SDAG-NEXT:    v_mov_b32_e32 v30, s18
-; SDAG-NEXT:    v_mov_b32_e32 v31, s19
-; SDAG-NEXT:    v_mov_b32_e32 v32, s20
-; SDAG-NEXT:    v_mov_b32_e32 v33, s21
-; SDAG-NEXT:    v_mov_b32_e32 v34, s22
-; SDAG-NEXT:    v_mov_b32_e32 v35, s23
-; SDAG-NEXT:    v_mov_b32_e32 v12, s24
-; SDAG-NEXT:    v_mov_b32_e32 v18, v0
-; SDAG-NEXT:    v_mov_b32_e32 v19, v1
-; SDAG-NEXT:    v_mov_b32_e32 v20, v2
-; SDAG-NEXT:    v_mov_b32_e32 v21, v3
-; SDAG-NEXT:    v_mov_b32_e32 v22, v4
-; SDAG-NEXT:    v_mov_b32_e32 v23, v5
-; SDAG-NEXT:    v_mov_b32_e32 v24, v6
-; SDAG-NEXT:    v_mov_b32_e32 v25, v7
-; SDAG-NEXT:    v_mov_b32_e32 v26, v8
-; SDAG-NEXT:    v_mov_b32_e32 v27, v9
+; SDAG-NEXT:    v_mov_b32_e32 v26, s0
+; SDAG-NEXT:    v_mov_b32_e32 v27, s1
+; SDAG-NEXT:    v_mov_b32_e32 v28, s2
+; SDAG-NEXT:    v_mov_b32_e32 v29, s3
+; SDAG-NEXT:    v_mov_b32_e32 v16, v10
+; SDAG-NEXT:    v_mov_b32_e32 v15, v9
+; SDAG-NEXT:    v_mov_b32_e32 v14, v8
+; SDAG-NEXT:    v_mov_b32_e32 v13, v7
+; SDAG-NEXT:    v_mov_b32_e32 v12, v6
+; SDAG-NEXT:    v_mov_b32_e32 v11, v5
+; SDAG-NEXT:    v_mov_b32_e32 v10, v4
+; SDAG-NEXT:    v_mov_b32_e32 v9, v3
+; SDAG-NEXT:    v_mov_b32_e32 v8, v2
+; SDAG-NEXT:    v_mov_b32_e32 v7, v1
+; SDAG-NEXT:    v_mov_b32_e32 v6, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
+; SDAG-NEXT:    v_mov_b32_e32 v5, s29
+; SDAG-NEXT:    v_mov_b32_e32 v18, s16
+; SDAG-NEXT:    v_mov_b32_e32 v19, s17
+; SDAG-NEXT:    v_mov_b32_e32 v20, s18
+; SDAG-NEXT:    v_mov_b32_e32 v21, s19
+; SDAG-NEXT:    v_mov_b32_e32 v22, s20
+; SDAG-NEXT:    v_mov_b32_e32 v23, s21
+; SDAG-NEXT:    v_mov_b32_e32 v24, s22
+; SDAG-NEXT:    v_mov_b32_e32 v25, s23
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_32x32x64_bf8_bf8 v[12:27], v[36:39], v[28:35], v10
-; SDAG-NEXT:    s_nop 11
-; SDAG-NEXT:    v_mov_b32_e32 v0, v12
-; SDAG-NEXT:    v_mov_b32_e32 v1, v13
-; SDAG-NEXT:    v_mov_b32_e32 v2, v14
-; SDAG-NEXT:    v_mov_b32_e32 v3, v15
-; SDAG-NEXT:    v_mov_b32_e32 v4, v16
-; SDAG-NEXT:    v_mov_b32_e32 v5, v17
-; SDAG-NEXT:    v_mov_b32_e32 v6, v18
-; SDAG-NEXT:    v_mov_b32_e32 v7, v19
-; SDAG-NEXT:    v_mov_b32_e32 v8, v20
-; SDAG-NEXT:    v_mov_b32_e32 v9, v21
-; SDAG-NEXT:    v_mov_b32_e32 v10, v22
-; SDAG-NEXT:    v_mov_b32_e32 v11, v23
-; SDAG-NEXT:    v_mov_b32_e32 v12, v24
-; SDAG-NEXT:    v_mov_b32_e32 v13, v25
-; SDAG-NEXT:    v_mov_b32_e32 v14, v26
-; SDAG-NEXT:    v_mov_b32_e32 v15, v27
+; SDAG-NEXT:    v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[26:29], v[18:25], v16
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_bf8__sgpr:
@@ -2685,53 +2586,37 @@ define <16 x float> @test_smfmac_f32_32x32x64_bf8_fp8__sgpr(<4 x i32> inreg %arg
 ; SDAG-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v36, s0
-; SDAG-NEXT:    v_mov_b32_e32 v37, s1
-; SDAG-NEXT:    v_mov_b32_e32 v38, s2
-; SDAG-NEXT:    v_mov_b32_e32 v39, s3
-; SDAG-NEXT:    v_mov_b32_e32 v13, s25
-; SDAG-NEXT:    v_mov_b32_e32 v14, s26
-; SDAG-NEXT:    v_mov_b32_e32 v15, s27
-; SDAG-NEXT:    v_mov_b32_e32 v16, s28
-; SDAG-NEXT:    v_mov_b32_e32 v17, s29
-; SDAG-NEXT:    v_mov_b32_e32 v28, s16
-; SDAG-NEXT:    v_mov_b32_e32 v29, s17
-; SDAG-NEXT:    v_mov_b32_e32 v30, s18
-; SDAG-NEXT:    v_mov_b32_e32 v31, s19
-; SDAG-NEXT:    v_mov_b32_e32 v32, s20
-; SDAG-NEXT:    v_mov_b32_e32 v33, s21
-; SDAG-NEXT:    v_mov_b32_e32 v34, s22
-; SDAG-NEXT:    v_mov_b32_e32 v35, s23
-; SDAG-NEXT:    v_mov_b32_e32 v12, s24
-; SDAG-NEXT:    v_mov_b32_e32 v18, v0
-; SDAG-NEXT:    v_mov_b32_e32 v19, v1
-; SDAG-NEXT:    v_mov_b32_e32 v20, v2
-; SDAG-NEXT:    v_mov_b32_e32 v21, v3
-; SDAG-NEXT:    v_mov_b32_e32 v22, v4
-; SDAG-NEXT:    v_mov_b32_e32 v23, v5
-; SDAG-NEXT:    v_mov_b32_e32 v24, v6
-; SDAG-NEXT:    v_mov_b32_e32 v25, v7
-; SDAG-NEXT:    v_mov_b32_e32 v26, v8
-; SDAG-NEXT:    v_mov_b32_e32 v27, v9
+; SDAG-NEXT:    v_mov_b32_e32 v26, s0
+; SDAG-NEXT:    v_mov_b32_e32 v27, s1
+; SDAG-NEXT:    v_mov_b32_e32 v28, s2
+; SDAG-NEXT:    v_mov_b32_e32 v29, s3
+; SDAG-NEXT:    v_mov_b32_e32 v16, v10
+; SDAG-NEXT:    v_mov_b32_e32 v15, v9
+; SDAG-NEXT:    v_mov_b32_e32 v14, v8
+; SDAG-NEXT:    v_mov_b32_e32 v13, v7
+; SDAG-NEXT:    v_mov_b32_e32 v12, v6
+; SDAG-NEXT:    v_mov_b32_e32 v11, v5
+; SDAG-NEXT:    v_mov_b32_e32 v10, v4
+; SDAG-NEXT:    v_mov_b32_e32 v9, v3
+; SDAG-NEXT:    v_mov_b32_e32 v8, v2
+; SDAG-NEXT:    v_mov_b32_e32 v7, v1
+; SDAG-NEXT:    v_mov_b32_e32 v6, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
+; SDAG-NEXT:    v_mov_b32_e32 v5, s29
+; SDAG-NEXT:    v_mov_b32_e32 v18, s16
+; SDAG-NEXT:    v_mov_b32_e32 v19, s17
+; SDAG-NEXT:    v_mov_b32_e32 v20, s18
+; SDAG-NEXT:    v_mov_b32_e32 v21, s19
+; SDAG-NEXT:    v_mov_b32_e32 v22, s20
+; SDAG-NEXT:    v_mov_b32_e32 v23, s21
+; SDAG-NEXT:    v_mov_b32_e32 v24, s22
+; SDAG-NEXT:    v_mov_b32_e32 v25, s23
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_32x32x64_bf8_fp8 v[12:27], v[36:39], v[28:35], v10
-; SDAG-NEXT:    s_nop 11
-; SDAG-NEXT:    v_mov_b32_e32 v0, v12
-; SDAG-NEXT:    v_mov_b32_e32 v1, v13
-; SDAG-NEXT:    v_mov_b32_e32 v2, v14
-; SDAG-NEXT:    v_mov_b32_e32 v3, v15
-; SDAG-NEXT:    v_mov_b32_e32 v4, v16
-; SDAG-NEXT:    v_mov_b32_e32 v5, v17
-; SDAG-NEXT:    v_mov_b32_e32 v6, v18
-; SDAG-NEXT:    v_mov_b32_e32 v7, v19
-; SDAG-NEXT:    v_mov_b32_e32 v8, v20
-; SDAG-NEXT:    v_mov_b32_e32 v9, v21
-; SDAG-NEXT:    v_mov_b32_e32 v10, v22
-; SDAG-NEXT:    v_mov_b32_e32 v11, v23
-; SDAG-NEXT:    v_mov_b32_e32 v12, v24
-; SDAG-NEXT:    v_mov_b32_e32 v13, v25
-; SDAG-NEXT:    v_mov_b32_e32 v14, v26
-; SDAG-NEXT:    v_mov_b32_e32 v15, v27
+; SDAG-NEXT:    v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[26:29], v[18:25], v16
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_32x32x64_bf8_fp8__sgpr:
@@ -3052,53 +2937,37 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_bf8__sgpr(<4 x i32> inreg %arg
 ; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v36, s0
-; SDAG-NEXT:    v_mov_b32_e32 v37, s1
-; SDAG-NEXT:    v_mov_b32_e32 v38, s2
-; SDAG-NEXT:    v_mov_b32_e32 v39, s3
-; SDAG-NEXT:    v_mov_b32_e32 v13, s25
-; SDAG-NEXT:    v_mov_b32_e32 v14, s26
-; SDAG-NEXT:    v_mov_b32_e32 v15, s27
-; SDAG-NEXT:    v_mov_b32_e32 v16, s28
-; SDAG-NEXT:    v_mov_b32_e32 v17, s29
-; SDAG-NEXT:    v_mov_b32_e32 v28, s16
-; SDAG-NEXT:    v_mov_b32_e32 v29, s17
-; SDAG-NEXT:    v_mov_b32_e32 v30, s18
-; SDAG-NEXT:    v_mov_b32_e32 v31, s19
-; SDAG-NEXT:    v_mov_b32_e32 v32, s20
-; SDAG-NEXT:    v_mov_b32_e32 v33, s21
-; SDAG-NEXT:    v_mov_b32_e32 v34, s22
-; SDAG-NEXT:    v_mov_b32_e32 v35, s23
-; SDAG-NEXT:    v_mov_b32_e32 v12, s24
-; SDAG-NEXT:    v_mov_b32_e32 v18, v0
-; SDAG-NEXT:    v_mov_b32_e32 v19, v1
-; SDAG-NEXT:    v_mov_b32_e32 v20, v2
-; SDAG-NEXT:    v_mov_b32_e32 v21, v3
-; SDAG-NEXT:    v_mov_b32_e32 v22, v4
-; SDAG-NEXT:    v_mov_b32_e32 v23, v5
-; SDAG-NEXT:    v_mov_b32_e32 v24, v6
-; SDAG-NEXT:    v_mov_b32_e32 v25, v7
-; SDAG-NEXT:    v_mov_b32_e32 v26, v8
-; SDAG-NEXT:    v_mov_b32_e32 v27, v9
+; SDAG-NEXT:    v_mov_b32_e32 v26, s0
+; SDAG-NEXT:    v_mov_b32_e32 v27, s1
+; SDAG-NEXT:    v_mov_b32_e32 v28, s2
+; SDAG-NEXT:    v_mov_b32_e32 v29, s3
+; SDAG-NEXT:    v_mov_b32_e32 v16, v10
+; SDAG-NEXT:    v_mov_b32_e32 v15, v9
+; SDAG-NEXT:    v_mov_b32_e32 v14, v8
+; SDAG-NEXT:    v_mov_b32_e32 v13, v7
+; SDAG-NEXT:    v_mov_b32_e32 v12, v6
+; SDAG-NEXT:    v_mov_b32_e32 v11, v5
+; SDAG-NEXT:    v_mov_b32_e32 v10, v4
+; SDAG-NEXT:    v_mov_b32_e32 v9, v3
+; SDAG-NEXT:    v_mov_b32_e32 v8, v2
+; SDAG-NEXT:    v_mov_b32_e32 v7, v1
+; SDAG-NEXT:    v_mov_b32_e32 v6, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
+; SDAG-NEXT:    v_mov_b32_e32 v5, s29
+; SDAG-NEXT:    v_mov_b32_e32 v18, s16
+; SDAG-NEXT:    v_mov_b32_e32 v19, s17
+; SDAG-NEXT:    v_mov_b32_e32 v20, s18
+; SDAG-NEXT:    v_mov_b32_e32 v21, s19
+; SDAG-NEXT:    v_mov_b32_e32 v22, s20
+; SDAG-NEXT:    v_mov_b32_e32 v23, s21
+; SDAG-NEXT:    v_mov_b32_e32 v24, s22
+; SDAG-NEXT:    v_mov_b32_e32 v25, s23
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_32x32x64_fp8_bf8 v[12:27], v[36:39], v[28:35], v10
-; SDAG-NEXT:    s_nop 11
-; SDAG-NEXT:    v_mov_b32_e32 v0, v12
-; SDAG-NEXT:    v_mov_b32_e32 v1, v13
-; SDAG-NEXT:    v_mov_b32_e32 v2, v14
-; SDAG-NEXT:    v_mov_b32_e32 v3, v15
-; SDAG-NEXT:    v_mov_b32_e32 v4, v16
-; SDAG-NEXT:    v_mov_b32_e32 v5, v17
-; SDAG-NEXT:    v_mov_b32_e32 v6, v18
-; SDAG-NEXT:    v_mov_b32_e32 v7, v19
-; SDAG-NEXT:    v_mov_b32_e32 v8, v20
-; SDAG-NEXT:    v_mov_b32_e32 v9, v21
-; SDAG-NEXT:    v_mov_b32_e32 v10, v22
-; SDAG-NEXT:    v_mov_b32_e32 v11, v23
-; SDAG-NEXT:    v_mov_b32_e32 v12, v24
-; SDAG-NEXT:    v_mov_b32_e32 v13, v25
-; SDAG-NEXT:    v_mov_b32_e32 v14, v26
-; SDAG-NEXT:    v_mov_b32_e32 v15, v27
+; SDAG-NEXT:    v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[26:29], v[18:25], v16
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_bf8__sgpr:
@@ -3419,53 +3288,37 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__sgpr(<4 x i32> inreg %arg
 ; SDAG-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__sgpr:
 ; SDAG:       ; %bb.0:
 ; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT:    v_mov_b32_e32 v36, s0
-; SDAG-NEXT:    v_mov_b32_e32 v37, s1
-; SDAG-NEXT:    v_mov_b32_e32 v38, s2
-; SDAG-NEXT:    v_mov_b32_e32 v39, s3
-; SDAG-NEXT:    v_mov_b32_e32 v13, s25
-; SDAG-NEXT:    v_mov_b32_e32 v14, s26
-; SDAG-NEXT:    v_mov_b32_e32 v15, s27
-; SDAG-NEXT:    v_mov_b32_e32 v16, s28
-; SDAG-NEXT:    v_mov_b32_e32 v17, s29
-; SDAG-NEXT:    v_mov_b32_e32 v28, s16
-; SDAG-NEXT:    v_mov_b32_e32 v29, s17
-; SDAG-NEXT:    v_mov_b32_e32 v30, s18
-; SDAG-NEXT:    v_mov_b32_e32 v31, s19
-; SDAG-NEXT:    v_mov_b32_e32 v32, s20
-; SDAG-NEXT:    v_mov_b32_e32 v33, s21
-; SDAG-NEXT:    v_mov_b32_e32 v34, s22
-; SDAG-NEXT:    v_mov_b32_e32 v35, s23
-; SDAG-NEXT:    v_mov_b32_e32 v12, s24
-; SDAG-NEXT:    v_mov_b32_e32 v18, v0
-; SDAG-NEXT:    v_mov_b32_e32 v19, v1
-; SDAG-NEXT:    v_mov_b32_e32 v20, v2
-; SDAG-NEXT:    v_mov_b32_e32 v21, v3
-; SDAG-NEXT:    v_mov_b32_e32 v22, v4
-; SDAG-NEXT:    v_mov_b32_e32 v23, v5
-; SDAG-NEXT:    v_mov_b32_e32 v24, v6
-; SDAG-NEXT:    v_mov_b32_e32 v25, v7
-; SDAG-NEXT:    v_mov_b32_e32 v26, v8
-; SDAG-NEXT:    v_mov_b32_e32 v27, v9
+; SDAG-NEXT:    v_mov_b32_e32 v26, s0
+; SDAG-NEXT:    v_mov_b32_e32 v27, s1
+; SDAG-NEXT:    v_mov_b32_e32 v28, s2
+; SDAG-NEXT:    v_mov_b32_e32 v29, s3
+; SDAG-NEXT:    v_mov_b32_e32 v16, v10
+; SDAG-NEXT:    v_mov_b32_e32 v15, v9
+; SDAG-NEXT:    v_mov_b32_e32 v14, v8
+; SDAG-NEXT:    v_mov_b32_e32 v13, v7
+; SDAG-NEXT:    v_mov_b32_e32 v12, v6
+; SDAG-NEXT:    v_mov_b32_e32 v11, v5
+; SDAG-NEXT:    v_mov_b32_e32 v10, v4
+; SDAG-NEXT:    v_mov_b32_e32 v9, v3
+; SDAG-NEXT:    v_mov_b32_e32 v8, v2
+; SDAG-NEXT:    v_mov_b32_e32 v7, v1
+; SDAG-NEXT:    v_mov_b32_e32 v6, v0
+; SDAG-NEXT:    v_mov_b32_e32 v0, s24
+; SDAG-NEXT:    v_mov_b32_e32 v1, s25
+; SDAG-NEXT:    v_mov_b32_e32 v2, s26
+; SDAG-NEXT:    v_mov_b32_e32 v3, s27
+; SDAG-NEXT:    v_mov_b32_e32 v4, s28
+; SDAG-NEXT:    v_mov_b32_e32 v5, s29
+; SDAG-NEXT:    v_mov_b32_e32 v18, s16
+; SDAG-NEXT:    v_mov_b32_e32 v19, s17
+; SDAG-NEXT:    v_mov_b32_e32 v20, s18
+; SDAG-NEXT:    v_mov_b32_e32 v21, s19
+; SDAG-NEXT:    v_mov_b32_e32 v22, s20
+; SDAG-NEXT:    v_mov_b32_e32 v23, s21
+; SDAG-NEXT:    v_mov_b32_e32 v24, s22
+; SDAG-NEXT:    v_mov_b32_e32 v25, s23
 ; SDAG-NEXT:    s_nop 1
-; SDAG-NEXT:    v_smfmac_f32_32x32x64_fp8_fp8 v[12:27], v[36:39], v[28:35], v10
-; SDAG-NEXT:    s_nop 11
-; SDAG-NEXT:    v_mov_b32_e32 v0, v12
-; SDAG-NEXT:    v_mov_b32_e32 v1, v13
-; SDAG-NEXT:    v_mov_b32_e32 v2, v14
-; SDAG-NEXT:    v_mov_b32_e32 v3, v15
-; SDAG-NEXT:    v_mov_b32_e32 v4, v16
-; SDAG-NEXT:    v_mov_b32_e32 v5, v17
-; SDAG-NEXT:    v_mov_b32_e32 v6, v18
-; SDAG-NEXT:    v_mov_b32_e32 v7, v19
-; SDAG-NEXT:    v_mov_b32_e32 v8, v20
-; SDAG-NEXT:    v_mov_b32_e32 v9, v21
-; SDAG-NEXT:    v_mov_b32_e32 v10, v22
-; SDAG-NEXT:    v_mov_b32_e32 v11, v23
-; SDAG-NEXT:    v_mov_b32_e32 v12, v24
-; SDAG-NEXT:    v_mov_b32_e32 v13, v25
-; SDAG-NEXT:    v_mov_b32_e32 v14, v26
-; SDAG-NEXT:    v_mov_b32_e32 v15, v27
+; SDAG-NEXT:    v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[26:29], v[18:25], v16
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-LABEL: test_smfmac_f32_32x32x64_fp8_fp8__sgpr:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
index dd89f80..ba769ef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
@@ -100,7 +100,7 @@ define amdgpu_kernel void @tanh_f16(ptr addrspace(1) %out, half %src) #1 {
 ; SDAG-REAL16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-REAL16-NEXT:    s_wait_kmcnt 0x0
 ; SDAG-REAL16-NEXT:    v_tanh_f16_e32 v0.l, s2
-; SDAG-REAL16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: tanh_f16:
@@ -123,7 +123,7 @@ define amdgpu_kernel void @tanh_f16_constant_4.0(ptr addrspace(1) %out) #1 {
 ; SDAG-REAL16-NEXT:    v_tanh_f16_e32 v0.l, 4.0
 ; SDAG-REAL16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-REAL16-NEXT:    s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: tanh_f16_constant_4.0:
@@ -146,7 +146,7 @@ define amdgpu_kernel void @tanh_f16_constant_100.0(ptr addrspace(1) %out) #1 {
 ; SDAG-REAL16-NEXT:    v_tanh_f16_e32 v0.l, 0x5640
 ; SDAG-REAL16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-REAL16-NEXT:    s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: tanh_f16_constant_100.0:
@@ -182,7 +182,7 @@ define amdgpu_kernel void @tanh_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
 ; SDAG-REAL16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-REAL16-NEXT:    s_wait_kmcnt 0x0
 ; SDAG-REAL16-NEXT:    v_tanh_bf16_e32 v0.l, s2
-; SDAG-REAL16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: tanh_bf16:
@@ -205,7 +205,7 @@ define amdgpu_kernel void @tanh_bf16_constant_4(ptr addrspace(1) %out) #1 {
 ; SDAG-REAL16-NEXT:    v_tanh_bf16_e32 v0.l, 4.0
 ; SDAG-REAL16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-REAL16-NEXT:    s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: tanh_bf16_constant_4:
@@ -228,7 +228,7 @@ define amdgpu_kernel void @tanh_bf16_constant_100(ptr addrspace(1) %out) #1 {
 ; SDAG-REAL16-NEXT:    v_tanh_bf16_e32 v0.l, 0x42c8
 ; SDAG-REAL16-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-REAL16-NEXT:    s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT:    flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT:    global_store_b16 v1, v0, s[0:1]
 ; SDAG-REAL16-NEXT:    s_endpgm
 ;
 ; SDAG-FAKE16-LABEL: tanh_bf16_constant_100:
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index 6a3d31f..0458a64 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -6,9 +6,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX11,GFX11-TRUE16 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX11,GFX11-FAKE16 %s
-; TODO: FIXME-TRUE16 - Enable this llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-TRUE16 %s
-; Crashing on v_test_imin_slt_i16
-; LLVM ERROR: Cannot select: 0x5f895f65b050: i16,ch = load<(load (s16) from %ir.b.gep, addrspace 1)>
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-TRUE16 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-FAKE16 %s
 
 define amdgpu_kernel void @v_test_imin_sle_i32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
@@ -1482,20 +1480,35 @@ define amdgpu_kernel void @v_test_imin_slt_i16(ptr addrspace(1) %out, ptr addrsp
 ; GFX11-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
 ; GFX11-FAKE16-NEXT:    s_endpgm
 ;
-; GFX1250-LABEL: v_test_imin_slt_i16:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_clause 0x1
-; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
-; GFX1250-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    s_clause 0x1
-; GFX1250-NEXT:    global_load_u16 v1, v0, s[2:3] scale_offset
-; GFX1250-NEXT:    global_load_u16 v2, v0, s[6:7] scale_offset
-; GFX1250-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-NEXT:    v_min_i16 v1, v1, v2
-; GFX1250-NEXT:    global_store_b16 v0, v1, s[0:1] scale_offset
-; GFX1250-NEXT:    s_endpgm
+; GFX1250-TRUE16-LABEL: v_test_imin_slt_i16:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_clause 0x1
+; GFX1250-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-TRUE16-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-TRUE16-NEXT:    v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    s_clause 0x1
+; GFX1250-TRUE16-NEXT:    global_load_u16 v0, v1, s[2:3] scale_offset
+; GFX1250-TRUE16-NEXT:    global_load_u16 v2, v1, s[6:7] scale_offset
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_min_i16 v0.l, v0.l, v2.l
+; GFX1250-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1] scale_offset
+; GFX1250-TRUE16-NEXT:    s_endpgm
+;
+; GFX1250-FAKE16-LABEL: v_test_imin_slt_i16:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_clause 0x1
+; GFX1250-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-FAKE16-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    s_clause 0x1
+; GFX1250-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3] scale_offset
+; GFX1250-FAKE16-NEXT:    global_load_u16 v2, v0, s[6:7] scale_offset
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_min_i16 v1, v1, v2
+; GFX1250-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1] scale_offset
+; GFX1250-FAKE16-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %a.gep = getelementptr inbounds i16, ptr addrspace(1) %aptr, i32 %tid
   %b.gep = getelementptr inbounds i16, ptr addrspace(1) %bptr, i32 %tid
@@ -2769,20 +2782,35 @@ define amdgpu_kernel void @v_test_umin_ult_i8(ptr addrspace(1) %out, ptr addrspa
 ; GFX11-FAKE16-NEXT:    global_store_b8 v0, v1, s[0:1]
 ; GFX11-FAKE16-NEXT:    s_endpgm
 ;
-; GFX1250-LABEL: v_test_umin_ult_i8:
-; GFX1250:       ; %bb.0:
-; GFX1250-NEXT:    s_clause 0x1
-; GFX1250-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
-; GFX1250-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
-; GFX1250-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    s_clause 0x1
-; GFX1250-NEXT:    global_load_u8 v1, v0, s[2:3]
-; GFX1250-NEXT:    global_load_u8 v2, v0, s[6:7]
-; GFX1250-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-NEXT:    v_min_u16 v1, v1, v2
-; GFX1250-NEXT:    global_store_b8 v0, v1, s[0:1]
-; GFX1250-NEXT:    s_endpgm
+; GFX1250-TRUE16-LABEL: v_test_umin_ult_i8:
+; GFX1250-TRUE16:       ; %bb.0:
+; GFX1250-TRUE16-NEXT:    s_clause 0x1
+; GFX1250-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-TRUE16-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-TRUE16-NEXT:    v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT:    s_clause 0x1
+; GFX1250-TRUE16-NEXT:    global_load_u8 v0, v1, s[2:3]
+; GFX1250-TRUE16-NEXT:    global_load_u8 v2, v1, s[6:7]
+; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT:    v_min_u16 v0.l, v0.l, v2.l
+; GFX1250-TRUE16-NEXT:    global_store_b8 v1, v0, s[0:1]
+; GFX1250-TRUE16-NEXT:    s_endpgm
+;
+; GFX1250-FAKE16-LABEL: v_test_umin_ult_i8:
+; GFX1250-FAKE16:       ; %bb.0:
+; GFX1250-FAKE16-NEXT:    s_clause 0x1
+; GFX1250-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-FAKE16-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT:    s_clause 0x1
+; GFX1250-FAKE16-NEXT:    global_load_u8 v1, v0, s[2:3]
+; GFX1250-FAKE16-NEXT:    global_load_u8 v2, v0, s[6:7]
+; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT:    v_min_u16 v1, v1, v2
+; GFX1250-FAKE16-NEXT:    global_store_b8 v0, v1, s[0:1]
+; GFX1250-FAKE16-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %a.gep = getelementptr inbounds i8, ptr addrspace(1) %a.ptr, i32 %tid
   %b.gep = getelementptr inbounds i8, ptr addrspace(1) %b.ptr, i32 %tid
@@ -5069,5 +5097,3 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX1250-FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index 57e6943..56f9c5d 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -638,6 +638,14 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x
 ; GFX12-NEXT:    v_med3_num_f32 v2, v2, v3, v4
 ; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: test_med3_minimumnum_maximumnum_f32:
+; GFX1250:       ; %bb.0:
+; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT:    s_wait_kmcnt 0x0
+; GFX1250-NEXT:    v_med3_num_f32 v2, v2, v3, v4
+; GFX1250-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX1250-NEXT:    s_set_pc_i64 s[30:31]
   %tmp0 = call float @llvm.minimumnum.f32(float %x, float %y)
   %tmp1 = call float @llvm.maximumnum.f32(float %x, float %y)
   %tmp2 = call float @llvm.minimumnum.f32(float %tmp1, float %z)
@@ -798,7 +806,7 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b
 ; SDAG-GFX1250-TRUE16-NEXT:    s_mov_b32 s5, s4
 ; SDAG-GFX1250-TRUE16-NEXT:    s_mov_b32 s4, s3
 ; SDAG-GFX1250-TRUE16-NEXT:    v_maxmin_num_f16 v0.l, s0, s1, v0.l
-; SDAG-GFX1250-TRUE16-NEXT:    flat_store_b16 v1, v0, s[4:5]
+; SDAG-GFX1250-TRUE16-NEXT:    global_store_b16 v1, v0, s[4:5]
 ; SDAG-GFX1250-TRUE16-NEXT:    s_endpgm
 ;
 ; SDAG-GFX1250-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
@@ -813,12 +821,12 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b
 ; GISEL-GFX1250-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
 ; GISEL-GFX1250-TRUE16:       ; %bb.0:
 ; GISEL-GFX1250-TRUE16-NEXT:    s_max_num_f16 s0, s0, s1
+; GISEL-GFX1250-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX1250-TRUE16-NEXT:    s_mov_b32 s6, s3
 ; GISEL-GFX1250-TRUE16-NEXT:    s_mov_b32 s7, s4
-; GISEL-GFX1250-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-GFX1250-TRUE16-NEXT:    s_min_num_f16 s0, s0, s2
-; GISEL-GFX1250-TRUE16-NEXT:    v_mov_b32_e32 v0, s0
-; GISEL-GFX1250-TRUE16-NEXT:    flat_store_b16 v1, v0, s[6:7]
+; GISEL-GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s0
+; GISEL-GFX1250-TRUE16-NEXT:    global_store_b16 v1, v0, s[6:7]
 ; GISEL-GFX1250-TRUE16-NEXT:    s_endpgm
 ;
 ; GISEL-GFX1250-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
@@ -1246,7 +1254,7 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
 ; SDAG-GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; SDAG-GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
 ; SDAG-GFX1250-TRUE16-NEXT:    v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
-; SDAG-GFX1250-TRUE16-NEXT:    flat_store_b16 v[0:1], v2
+; SDAG-GFX1250-TRUE16-NEXT:    global_store_b16 v[0:1], v2, off
 ; SDAG-GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
 ;
 ; SDAG-GFX1250-FAKE16-LABEL: test_med3_f16:
@@ -1262,7 +1270,7 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
 ; GISEL-GFX1250-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; GISEL-GFX1250-TRUE16-NEXT:    s_wait_kmcnt 0x0
 ; GISEL-GFX1250-TRUE16-NEXT:    v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
-; GISEL-GFX1250-TRUE16-NEXT:    flat_store_b16 v[0:1], v2
+; GISEL-GFX1250-TRUE16-NEXT:    global_store_b16 v[0:1], v2, off
 ; GISEL-GFX1250-TRUE16-NEXT:    s_set_pc_i64 s[30:31]
 ;
 ; GISEL-GFX1250-FAKE16-LABEL: test_med3_f16:
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
index 42469c8..23e90b3 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
@@ -202,13 +202,13 @@ attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
 attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
 attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
index 06533b4..0be3147 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
@@ -399,25 +399,25 @@ attributes #17 = { "amdgpu-waves-per-eu"="5,8" }
 attributes #18 = { "amdgpu-waves-per-eu"="9,10" }
 attributes #19 = { "amdgpu-waves-per-eu"="8,9" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
index 8930626..33da671 100644
--- a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
@@ -19,5 +19,5 @@ define void @hoge()  {
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
index 3dfb0e1..f847d66 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
@@ -191,12 +191,12 @@ define amdgpu_kernel void @kernel_lds_recursion() {
 !1 = !{i32 1, !"amdhsa_code_object_version", i32 400}
 
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="4" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-lds-size"="4" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
 ; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
index b9e9893..9a23788 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
@@ -369,7 +369,7 @@ define amdgpu_kernel void @illegal_mfma_after_rewrite() #1 {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_mov_b32 s0, 0
 ; CHECK-NEXT:    s_mov_b32 s1, s0
-; CHECK-NEXT:    v_mov_b64_e32 v[8:9], s[0:1]
+; CHECK-NEXT:    v_mov_b64_e32 v[28:29], s[0:1]
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def s[0:3]
 ; CHECK-NEXT:    ;;#ASMEND
@@ -378,73 +378,66 @@ define amdgpu_kernel void @illegal_mfma_after_rewrite() #1 {
 ; CHECK-NEXT:    v_mov_b64_e32 v[4:5], s[0:1]
 ; CHECK-NEXT:    s_mov_b32 s0, 0x3c003c00
 ; CHECK-NEXT:    s_mov_b32 s1, s0
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[8:9], v[4:7]
-; CHECK-NEXT:    v_mov_b64_e32 v[12:13], s[0:1]
+; CHECK-NEXT:    v_mov_b64_e32 v[30:31], s[0:1]
 ; CHECK-NEXT:    s_mov_b32 s0, 0x7e007e00
 ; CHECK-NEXT:    s_mov_b32 s1, s0
-; CHECK-NEXT:    v_mov_b64_e32 v[10:11], s[0:1]
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[14:17], v[8:9], v[12:13], v[4:7]
-; CHECK-NEXT:    s_nop 1
-; CHECK-NEXT:    v_accvgpr_write_b32 a0, v0
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[18:21], v[8:9], v[10:11], v[4:7]
-; CHECK-NEXT:    v_accvgpr_write_b32 a1, v1
-; CHECK-NEXT:    v_accvgpr_write_b32 a2, v2
-; CHECK-NEXT:    v_accvgpr_write_b32 a3, v3
+; CHECK-NEXT:    v_accvgpr_write_b32 a0, s0
+; CHECK-NEXT:    v_accvgpr_write_b32 a1, s1
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[0:3], v[28:29], v[28:29], v[4:7]
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[8:11], v[28:29], v[30:31], v[4:7]
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[12:15], v[28:29], a[0:1], v[4:7]
+; CHECK-NEXT:    s_nop 2
 ; CHECK-NEXT:    v_mov_b32_e32 v4, 0x7fc00000
 ; CHECK-NEXT:    v_mov_b32_e32 v5, v4
 ; CHECK-NEXT:    v_mov_b32_e32 v6, v4
 ; CHECK-NEXT:    v_mov_b32_e32 v7, v4
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[14:17], v[8:9], v[8:9], v[14:17]
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[8:11], v[28:29], v[28:29], v[8:11]
 ; CHECK-NEXT:    s_nop 0
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[22:25], v[8:9], v[8:9], v[4:7]
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[16:19], v[28:29], v[28:29], v[4:7]
 ; CHECK-NEXT:    ;;#ASMSTART
 ; CHECK-NEXT:    ; def v[4:7]
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    s_nop 0
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[12:13], v[4:7]
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[26:29], v[8:9], v[8:9], v[4:7]
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[8:9], v[0:3]
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[22:25], v[8:9], v[8:9], v[22:25]
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[4:7], v[8:9], v[8:9], v[26:29]
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[16:19], v[28:29], v[28:29], v[16:19]
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[24:27], v[28:29], v[30:31], v[4:7]
 ; CHECK-NEXT:    s_nop 5
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v23, v14
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[14:17], v[8:9], v[8:9], v[18:21]
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[0:3], v[12:13], v[8:9], v[0:3]
-; CHECK-NEXT:    s_nop 1
-; CHECK-NEXT:    v_accvgpr_read_b32 v19, a3
-; CHECK-NEXT:    v_accvgpr_read_b32 v18, a2
-; CHECK-NEXT:    v_mov_b64_e32 v[20:21], 0
-; CHECK-NEXT:    s_nop 0
-; CHECK-NEXT:    v_accvgpr_read_b32 v17, a1
-; CHECK-NEXT:    v_accvgpr_read_b32 v16, a0
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v15, v22
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v14, v14
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[16:19], v[8:9], v[8:9], v[16:19]
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v12, v0
-; CHECK-NEXT:    global_store_short v[20:21], v23, off
+; CHECK-NEXT:    v_cvt_f16_f32_e32 v17, v8
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[8:11], v[28:29], v[28:29], v[12:15]
+; CHECK-NEXT:    s_nop 2
+; CHECK-NEXT:    v_mov_b64_e32 v[12:13], 0
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[0:3], v[28:29], v[28:29], v[0:3]
+; CHECK-NEXT:    global_store_short v[12:13], v17, off
 ; CHECK-NEXT:    buffer_wbl2 sc0 sc1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    buffer_inv sc0 sc1
-; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[0:3], v[10:11], v[8:9], v[4:7]
-; CHECK-NEXT:    global_store_short v[20:21], v15, off
+; CHECK-NEXT:    v_cvt_f16_f32_e32 v9, v16
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[20:23], v[28:29], v[28:29], v[4:7]
+; CHECK-NEXT:    global_store_short v[12:13], v9, off
+; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v8
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[8:11], v[28:29], v[28:29], v[24:27]
 ; CHECK-NEXT:    buffer_wbl2 sc0 sc1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    buffer_inv sc0 sc1
-; CHECK-NEXT:    global_store_short v[20:21], v14, off
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v14, v16
+; CHECK-NEXT:    v_cvt_f16_f32_e32 v14, v0
+; CHECK-NEXT:    global_store_short v[12:13], v1, off
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[4:7], v[28:29], v[28:29], v[20:23]
 ; CHECK-NEXT:    buffer_wbl2 sc0 sc1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    buffer_inv sc0 sc1
-; CHECK-NEXT:    global_store_short v[20:21], v14, off
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; CHECK-NEXT:    global_store_short v[12:13], v14, off
 ; CHECK-NEXT:    buffer_wbl2 sc0 sc1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    buffer_inv sc0 sc1
-; CHECK-NEXT:    global_store_short v[20:21], v12, off
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[0:3], v[30:31], v[28:29], v[8:11]
+; CHECK-NEXT:    s_nop 6
+; CHECK-NEXT:    v_cvt_f16_f32_e32 v8, v0
+; CHECK-NEXT:    v_mfma_f32_16x16x16_f16 v[0:3], a[0:1], v[28:29], v[4:7]
+; CHECK-NEXT:    global_store_short v[12:13], v8, off
 ; CHECK-NEXT:    buffer_wbl2 sc0 sc1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    buffer_inv sc0 sc1
-; CHECK-NEXT:    global_store_short v[20:21], v0, off
+; CHECK-NEXT:    s_nop 2
+; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; CHECK-NEXT:    global_store_short v[12:13], v0, off
 ; CHECK-NEXT:    s_endpgm
 entry:
   %k0 = call <4 x float> asm sideeffect "; def $0", "=s"()
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index bb22144..9814ed8 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -1,15 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9,GFX9-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-FAKE16 %s
-
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX9,GFX9-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
 
 define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
 ; CI-LABEL: add_select_fabs_fabs_v2f16:
@@ -63,69 +57,37 @@ define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -198,73 +160,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fabs_fabs_v2f16(<2 x
 ; GFX9-NEXT:    v_pk_add_f16 v1, v1, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v1, v2, v4
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v5
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v1, v2, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v5
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v1, v2, v4
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v5
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v1, v2, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v5
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v1, v2, v4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v5
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v1, v2, v4
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v5
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -328,73 +256,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fabs_fabs_v2f1
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-TRUE16-NEXT:    v_mov_b32_e32 v1, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v1, v2
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-TRUE16-NEXT:    v_mov_b32_e32 v1, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v1, v2
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v0.h, v2.h, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v1, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -469,73 +363,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fabs_fabs_v2f16(<2 x
 ; GFX9-NEXT:    v_pk_add_f16 v1, v2, v5
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v1, v2, v5
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v1, v3, v5
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v1, v2, v5
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v1, v3, v5
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v1, v2, v5
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v1, v3, v5
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -597,63 +457,34 @@ define <2 x half> @add_select_fabs_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %y
@@ -709,61 +540,33 @@ define <2 x half> @add_select_fabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half -1.0, half -1.0>
@@ -815,61 +618,33 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
 ; GFX9-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %select)
@@ -920,61 +695,33 @@ define <2 x half> @add_select_posk_posk_v2f16(<2 x i32> %c, <2 x half> %x) {
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0x3c00
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0x4000, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0x4000
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0x3c00
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0x4000, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0x4000
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0x3c00
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0x4000, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v3, 0x4000
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half 2.0, half 2.0>, <2 x half> <half 1.0, half 1.0>
   %add = fadd <2 x half> %select, %x
@@ -1029,61 +776,33 @@ define <2 x half> @add_select_negk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fabs
@@ -1140,61 +859,33 @@ define <2 x half> @add_select_negliteralk_fabs_v2f16(<2 x i32> %c, <2 x half> %x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xe400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xe400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xe400, v2.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> <half -1024.0, half -1024.0>, <2 x half> %fabs
@@ -1250,61 +941,33 @@ define <2 x half> @add_select_fabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half 1.0, half 1.0>
@@ -1360,61 +1023,33 @@ define <2 x half> @add_select_posk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fabs
@@ -1470,57 +1105,31 @@ define <2 x half> @add_select_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fneg.y = fneg <2 x half> %y
@@ -1587,61 +1196,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fneg_fneg_v2f16(<2 x
 ; GFX9-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fneg.y = fneg <2 x half> %y
@@ -1705,61 +1286,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fneg_fneg_v2f1
 ; GFX9-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fneg.y = fneg <2 x half> %y
@@ -1828,61 +1381,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fneg_fneg_v2f16(<2 x
 ; GFX9-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fneg.y = fneg <2 x half> %y
@@ -1948,63 +1473,34 @@ define <2 x half> @add_select_fneg_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %y
@@ -2058,55 +1554,30 @@ define <2 x half> @add_select_fneg_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half -1.0, half -1.0>
@@ -2161,55 +1632,30 @@ define <2 x half> @add_select_fneg_inv2pi_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xb118, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xb118, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xb118, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xH3118, half 0xH3118>
@@ -2264,55 +1710,30 @@ define <2 x half> @add_select_fneg_neginv2pi_v2f16(<2 x i32> %c, <2 x half> %x,
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3118, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3118, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3118, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xHB118, half 0xHB118>
@@ -2363,61 +1784,33 @@ define <2 x half> @add_select_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) {
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
   %add = fadd <2 x half> %select, %x
@@ -2469,61 +1862,33 @@ define <2 x half> @add_select_negliteralk_negliteralk_v2f16(<2 x i32> %c, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xec00
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xe800, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xe800
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xec00
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xe800, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xe800
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xec00
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xe800, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xe800
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half -2048.0, half -2048.0>, <2 x half> <half -4096.0, half -4096.0>
   %add = fadd <2 x half> %select, %x
@@ -2573,61 +1938,33 @@ define <2 x half> @add_select_fneg_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
 ; GFX9-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
   %fneg.x = fneg <2 x half> %select
@@ -2681,55 +2018,30 @@ define <2 x half> @add_select_negk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fneg.x
@@ -2783,55 +2095,30 @@ define <2 x half> @add_select_fneg_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 1.0, half 1.0>
@@ -2885,55 +2172,30 @@ define <2 x half> @add_select_posk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fneg.x
@@ -2997,69 +2259,37 @@ define <2 x half> @add_select_negfabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3125,69 +2355,37 @@ define <2 x half> @add_select_fabs_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v3
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v3, 0x80008000, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v3
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v3, 0x80008000, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v3
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_or_b32_e32 v3, 0x80008000, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3253,69 +2451,37 @@ define <2 x half> @add_select_neg_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3380,69 +2546,37 @@ define <2 x half> @add_select_fabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v3
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v3
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v3
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.y = fneg <2 x half> %y
@@ -3501,63 +2635,34 @@ define <2 x half> @add_select_neg_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v1.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v1.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v1.l, v2.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3617,63 +2722,34 @@ define <2 x half> @add_select_negfabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v1.l, v3.l, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v1.l, v3.l, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v1.l, v3.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3735,61 +2811,33 @@ define <2 x half> @mul_select_negfabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4400, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4400, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4400, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3850,61 +2898,33 @@ define <2 x half> @mul_select_posk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4400, v2.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3965,61 +2985,33 @@ define <2 x half> @mul_select_negfabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xc400, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xc400, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xc400, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -4080,61 +3072,33 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xc400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xc400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xc400, v2.h, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -4171,115 +3135,63 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
 ; CI-NEXT:    v_cndmask_b32_e32 v1, 2.0, v2, vcc
 ; CI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; VI-SAFE:       ; %bb.0:
-; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x4400
-; VI-SAFE-NEXT:    v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SAFE-NEXT:    v_add_f16_e32 v2, 4.0, v2
-; VI-SAFE-NEXT:    v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX9-SAFE:       ; %bb.0:
-; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT:    v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT:    v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; VI-NSZ:       ; %bb.0:
-; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT:    v_mov_b32_e32 v1, 0xc400
-; VI-NSZ-NEXT:    v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NSZ-NEXT:    v_sub_f16_e32 v2, -4.0, v2
-; VI-NSZ-NEXT:    v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT:    v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX9-NSZ:       ; %bb.0:
-; GFX9-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT:    v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX9-NSZ-NEXT:    v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT:    v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_add_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_mov_b32_e32 v1, 0x4400
+; VI-NEXT:    v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_add_f16_e32 v2, 4.0, v2
+; VI-NEXT:    v_or_b32_e32 v1, v2, v1
+; VI-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT:    v_mov_b32_e32 v2, 0x4000
+; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT:    v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
+; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %add = fadd <2 x half> %x, <half 4.0, half 4.0>
   %fneg = fneg <2 x half> %add
@@ -4330,55 +3242,30 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
 ; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %add = fadd nsz <2 x half> %x, <half 4.0, half 4.0>
   %fneg = fneg <2 x half> %add
@@ -4387,153 +3274,86 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
 }
 
 define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
-; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-SAFE:       ; %bb.0:
-; CI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT:    v_add_f32_e32 v3, -4.0, v3
-; CI-SAFE-NEXT:    v_add_f32_e32 v2, -4.0, v2
-; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; CI-SAFE-NEXT:    v_or_b32_e32 v2, v2, v3
-; CI-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v2
-; CI-SAFE-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT:    v_cndmask_b32_e32 v0, 2.0, v3, vcc
-; CI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-SAFE-NEXT:    v_cndmask_b32_e32 v1, 2.0, v2, vcc
-; CI-SAFE-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; VI-SAFE:       ; %bb.0:
-; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0xc400
-; VI-SAFE-NEXT:    v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SAFE-NEXT:    v_add_f16_e32 v2, -4.0, v2
-; VI-SAFE-NEXT:    v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX9-SAFE:       ; %bb.0:
-; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT:    v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0]
-; GFX9-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT:    v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-NSZ:       ; %bb.0:
-; CI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; CI-NSZ-NEXT:    v_cvt_f16_f32_e32 v3, v3
-; CI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
-; CI-NSZ-NEXT:    v_cvt_f32_f16_e32 v3, v3
-; CI-NSZ-NEXT:    v_sub_f32_e32 v2, 4.0, v2
-; CI-NSZ-NEXT:    v_sub_f32_e32 v3, 4.0, v3
-; CI-NSZ-NEXT:    v_cndmask_b32_e32 v0, 2.0, v2, vcc
-; CI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-NSZ-NEXT:    v_cndmask_b32_e32 v1, 2.0, v3, vcc
-; CI-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; VI-NSZ:       ; %bb.0:
-; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x4400
-; VI-NSZ-NEXT:    v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NSZ-NEXT:    v_sub_f16_e32 v2, 4.0, v2
-; VI-NSZ-NEXT:    v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT:    v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX9-NSZ:       ; %bb.0:
-; GFX9-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT:    v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX9-NSZ-NEXT:    v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT:    v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; CI-LABEL: select_fneg_posk_src_sub_v2f16:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT:    v_add_f32_e32 v3, -4.0, v3
+; CI-NEXT:    v_add_f32_e32 v2, -4.0, v2
+; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT:    v_or_b32_e32 v2, v2, v3
+; CI-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; CI-NEXT:    v_cvt_f32_f16_e32 v3, v2
+; CI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT:    v_cndmask_b32_e32 v0, 2.0, v3, vcc
+; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT:    v_cndmask_b32_e32 v1, 2.0, v2, vcc
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_sub_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_mov_b32_e32 v1, 0xc400
+; VI-NEXT:    v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_add_f16_e32 v2, -4.0, v2
+; VI-NEXT:    v_or_b32_e32 v1, v2, v1
+; VI-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT:    v_mov_b32_e32 v2, 0x4000
+; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT:    v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0]
+; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
+; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %add = fsub <2 x half> %x, <half 4.0, half 4.0>
   %fneg = fneg <2 x half> %add
@@ -4541,6 +3361,80 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
   ret <2 x half> %select
 }
 
+define <2 x half> @select_fneg_posk_src_sub_v2f16_nsz(<2 x i32> %c, <2 x half> %x) {
+; CI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT:    v_sub_f32_e32 v2, 4.0, v2
+; CI-NEXT:    v_sub_f32_e32 v3, 4.0, v3
+; CI-NEXT:    v_cndmask_b32_e32 v0, 2.0, v2, vcc
+; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT:    v_cndmask_b32_e32 v1, 2.0, v3, vcc
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_mov_b32_e32 v1, 0x4400
+; VI-NEXT:    v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT:    v_sub_f16_e32 v2, 4.0, v2
+; VI-NEXT:    v_mov_b32_e32 v3, 0x4000
+; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT:    v_cndmask_b32_e64 v0, v3, v2, s[4:5]
+; VI-NEXT:    v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT:    v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
+; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp eq <2 x i32> %c, zeroinitializer
+  %add = fsub <2 x half> %x, <half 4.0, half 4.0>
+  %fneg = fneg nsz <2 x half> %add
+  %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
+  ret <2 x half> %select
+}
+
 define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
 ; CI-LABEL: select_fneg_posk_src_mul_v2f16:
 ; CI:       ; %bb.0:
@@ -4584,55 +3478,30 @@ define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
 ; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %mul = fmul <2 x half> %x, <half 4.0, half 4.0>
   %fneg = fneg <2 x half> %mul
@@ -4668,118 +3537,65 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
 ; CI-NEXT:    v_cndmask_b32_e32 v1, 2.0, v2, vcc
 ; CI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
-; VI-SAFE:       ; %bb.0:
-; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
-; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; VI-SAFE-NEXT:    v_fma_f16 v1, v4, 4.0, v1
-; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; VI-SAFE-NEXT:    v_fma_f16 v2, v2, 4.0, v3
-; VI-SAFE-NEXT:    v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX9-SAFE:       ; %bb.0:
-; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT:    v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX9-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT:    v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
-; VI-NSZ:       ; %bb.0:
-; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
-; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; VI-NSZ-NEXT:    v_fma_f16 v1, v4, -4.0, -v1
-; VI-NSZ-NEXT:    v_fma_f16 v2, v2, -4.0, -v3
-; VI-NSZ-NEXT:    v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT:    v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX9-NSZ:       ; %bb.0:
-; GFX9-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT:    v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX9-NSZ-NEXT:    v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT:    v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_fma_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT:    v_fma_f16 v1, v4, 4.0, v1
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT:    v_fma_f16 v2, v2, 4.0, v3
+; VI-NEXT:    v_or_b32_e32 v1, v2, v1
+; VI-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT:    v_mov_b32_e32 v2, 0x4000
+; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT:    v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
+; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
   %fneg = fneg <2 x half> %fma
@@ -4817,118 +3633,65 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
 ; CI-NEXT:    v_cndmask_b32_e32 v1, 2.0, v2, vcc
 ; CI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; VI-SAFE:       ; %bb.0:
-; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
-; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; VI-SAFE-NEXT:    v_fma_f16 v1, v4, 4.0, v1
-; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; VI-SAFE-NEXT:    v_fma_f16 v2, v2, 4.0, v3
-; VI-SAFE-NEXT:    v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX9-SAFE:       ; %bb.0:
-; GFX9-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT:    v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX9-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT:    v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; VI-NSZ:       ; %bb.0:
-; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
-; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; VI-NSZ-NEXT:    v_fma_f16 v1, v4, -4.0, -v1
-; VI-NSZ-NEXT:    v_fma_f16 v2, v2, -4.0, -v3
-; VI-NSZ-NEXT:    v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT:    v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX9-NSZ:       ; %bb.0:
-; GFX9-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT:    v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX9-NSZ-NEXT:    v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT:    s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT:    v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_fmad_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT:    v_fma_f16 v1, v4, 4.0, v1
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT:    v_fma_f16 v2, v2, 4.0, v3
+; VI-NEXT:    v_or_b32_e32 v1, v2, v1
+; VI-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT:    v_mov_b32_e32 v2, 0x4000
+; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT:    v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX9-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
+; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
   %fneg = fneg <2 x half> %fmad
@@ -4986,55 +3749,30 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half>
 ; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-SAFE-TRUE16:       ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT:    v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-SAFE-FAKE16:       ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-NSZ-TRUE16:       ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT:    v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-NSZ-FAKE16:       ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT:    v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fmad = call nsz <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
   %fneg = fneg <2 x half> %fmad
@@ -5049,5 +3787,3 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX11: {{.*}}
-; GFX11-NSZ: {{.*}}
-; GFX11-SAFE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
index f1cadea..0868148 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
@@ -63,7 +63,7 @@ define amdgpu_kernel void @foo(ptr noundef %fp) {
 ; OW-NEXT:    ret void
 ;
 ; CW-LABEL: define {{[^@]+}}@foo
-; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
+; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR0]] {
 ; CW-NEXT:  entry:
 ; CW-NEXT:    [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; CW-NEXT:    store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
@@ -84,7 +84,7 @@ define amdgpu_kernel void @foo(ptr noundef %fp) {
 ; CW-NEXT:    ret void
 ;
 ; NO-LABEL: define {{[^@]+}}@foo
-; NO-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
+; NO-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR0]] {
 ; NO-NEXT:  entry:
 ; NO-NEXT:    [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; NO-NEXT:    store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
@@ -101,14 +101,12 @@ entry:
 }
 
 ;.
-; NO: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; NO: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; NO: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
-; OW: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; OW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.
-; CW: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CW: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
 ; NO: [[META0]] = !{ptr @bar1, ptr @bar2}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index 775d2f9..8fcaf5e 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
 
 
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.
 ; ATTRIBUTOR_GCN: [[META0]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/smfmac_alloc_failure_no_agpr_O0.ll b/llvm/test/CodeGen/AMDGPU/smfmac_alloc_failure_no_agpr_O0.ll
new file mode 100644
index 0000000..ba0fdc68
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/smfmac_alloc_failure_no_agpr_O0.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx950 -amdgpu-mfma-vgpr-form=0 < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx950 -amdgpu-mfma-vgpr-form=1 < %s | FileCheck %s
+
+declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.f16(<8 x half>, <16 x half>, <16 x float>, i32, i32 immarg, i32 immarg)
+
+define amdgpu_kernel void @test_smfmac_f32_32x32x32_f16__vgpr(ptr addrspace(1) %arg, <8 x half> %a, <16 x half> %b, i32 %idx) #0 {
+; CHECK-LABEL: test_smfmac_f32_32x32x32_f16__vgpr:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[4:5]
+; CHECK-NEXT:    v_mov_b32_e32 v1, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
+; CHECK-NEXT:    s_load_dwordx4 s[12:15], s[2:3], 0x34
+; CHECK-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x44
+; CHECK-NEXT:    s_nop 0
+; CHECK-NEXT:    s_load_dword s2, s[2:3], 0x64
+; CHECK-NEXT:    s_mov_b32 s3, 0x3ff
+; CHECK-NEXT:    v_and_b32_e64 v1, v1, s3
+; CHECK-NEXT:    s_mov_b32 s3, 6
+; CHECK-NEXT:    v_lshlrev_b32_e64 v8, s3, v1
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    global_load_dwordx4 v[4:7], v8, s[0:1] offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v1, v7
+; CHECK-NEXT:    v_mov_b32_e32 v2, v6
+; CHECK-NEXT:    v_mov_b32_e32 v3, v5
+; CHECK-NEXT:    ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
+; CHECK-NEXT:    global_load_dwordx4 v[10:13], v8, s[0:1] offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v5, v13
+; CHECK-NEXT:    v_mov_b32_e32 v6, v12
+; CHECK-NEXT:    v_mov_b32_e32 v7, v11
+; CHECK-NEXT:    v_mov_b32_e32 v24, v10
+; CHECK-NEXT:    global_load_dwordx4 v[10:13], v8, s[0:1] offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v25, v13
+; CHECK-NEXT:    v_mov_b32_e32 v26, v12
+; CHECK-NEXT:    v_mov_b32_e32 v27, v11
+; CHECK-NEXT:    v_mov_b32_e32 v28, v10
+; CHECK-NEXT:    global_load_dwordx4 v[8:11], v8, s[0:1]
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v29, v11
+; CHECK-NEXT:    v_mov_b32_e32 v30, v10
+; CHECK-NEXT:    v_mov_b32_e32 v31, v9
+; CHECK-NEXT:    ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
+; CHECK-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v9, v31
+; CHECK-NEXT:    v_mov_b32_e32 v10, v30
+; CHECK-NEXT:    v_mov_b32_e32 v11, v29
+; CHECK-NEXT:    v_mov_b32_e32 v12, v28
+; CHECK-NEXT:    v_mov_b32_e32 v13, v27
+; CHECK-NEXT:    v_mov_b32_e32 v14, v26
+; CHECK-NEXT:    v_mov_b32_e32 v15, v25
+; CHECK-NEXT:    v_mov_b32_e32 v16, v24
+; CHECK-NEXT:    v_mov_b32_e32 v17, v7
+; CHECK-NEXT:    v_mov_b32_e32 v18, v6
+; CHECK-NEXT:    v_mov_b32_e32 v19, v5
+; CHECK-NEXT:    v_mov_b32_e32 v20, v4
+; CHECK-NEXT:    v_mov_b32_e32 v21, v3
+; CHECK-NEXT:    v_mov_b32_e32 v22, v2
+; CHECK-NEXT:    v_mov_b32_e32 v23, v1
+; CHECK-NEXT:    v_mov_b64_e32 v[2:3], s[12:13]
+; CHECK-NEXT:    v_mov_b64_e32 v[4:5], s[14:15]
+; CHECK-NEXT:    v_mov_b64_e32 v[30:31], s[10:11]
+; CHECK-NEXT:    v_mov_b64_e32 v[28:29], s[8:9]
+; CHECK-NEXT:    v_mov_b64_e32 v[26:27], s[6:7]
+; CHECK-NEXT:    v_mov_b64_e32 v[24:25], s[4:5]
+; CHECK-NEXT:    v_mov_b32_e32 v1, s2
+; CHECK-NEXT:    s_nop 1
+; CHECK-NEXT:    v_smfmac_f32_32x32x32_f16 v[8:23], v[2:5], v[24:31], v1 cbsz:1 abid:2
+; CHECK-NEXT:    s_nop 11
+; CHECK-NEXT:    v_mov_b32_e32 v1, v23
+; CHECK-NEXT:    v_mov_b32_e32 v6, v22
+; CHECK-NEXT:    v_mov_b32_e32 v7, v21
+; CHECK-NEXT:    v_mov_b32_e32 v2, v20
+; CHECK-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v3, v7
+; CHECK-NEXT:    v_mov_b32_e32 v4, v6
+; CHECK-NEXT:    v_mov_b32_e32 v5, v1
+; CHECK-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:48
+; CHECK-NEXT:    v_mov_b32_e32 v1, v19
+; CHECK-NEXT:    v_mov_b32_e32 v6, v18
+; CHECK-NEXT:    v_mov_b32_e32 v7, v17
+; CHECK-NEXT:    v_mov_b32_e32 v2, v16
+; CHECK-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v3, v7
+; CHECK-NEXT:    v_mov_b32_e32 v4, v6
+; CHECK-NEXT:    v_mov_b32_e32 v5, v1
+; CHECK-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:32
+; CHECK-NEXT:    v_mov_b32_e32 v1, v15
+; CHECK-NEXT:    v_mov_b32_e32 v6, v14
+; CHECK-NEXT:    v_mov_b32_e32 v7, v13
+; CHECK-NEXT:    v_mov_b32_e32 v2, v12
+; CHECK-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v3, v7
+; CHECK-NEXT:    v_mov_b32_e32 v4, v6
+; CHECK-NEXT:    v_mov_b32_e32 v5, v1
+; CHECK-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; CHECK-NEXT:    v_mov_b32_e32 v1, v11
+; CHECK-NEXT:    v_mov_b32_e32 v6, v10
+; CHECK-NEXT:    v_mov_b32_e32 v7, v9
+; CHECK-NEXT:    v_mov_b32_e32 v2, v8
+; CHECK-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v3, v7
+; CHECK-NEXT:    v_mov_b32_e32 v4, v6
+; CHECK-NEXT:    v_mov_b32_e32 v5, v1
+; CHECK-NEXT:    global_store_dwordx4 v0, v[2:5], s[0:1]
+; CHECK-NEXT:    s_endpgm
+bb:
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr <16 x float>, ptr addrspace(1) %arg, i32 %id
+  %in.1 = load <16 x float>, ptr addrspace(1) %gep
+  %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.f16(<8 x half> %a, <16 x half> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2)
+  store <16 x float> %mai.1, ptr addrspace(1) %arg
+  ret void
+}
+
+attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-agpr-alloc"="0,0" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
index a1557418..8dfd3b7 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -31,5 +31,5 @@ define amdgpu_kernel void @kernel1() #1 {
 
 attributes #0 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
index fb225a9..fa01ee9 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -98,7 +98,7 @@ define amdgpu_kernel void @kernel2() #0 {
 attributes #0 = { "uniform-work-group-size"="true" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
index cfede0c..09001ca 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
 
 attributes #2 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
index 854b724..4dede21 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel2() #2 {
 
 attributes #1 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
index c4e0a60..08e1556 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
@@ -52,8 +52,8 @@ attributes #0 = { nounwind }
 attributes #1 = { "uniform-work-group-size"="false" }
 attributes #2 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR2]] = { nounwind "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size"="true" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 05af74d..9090d605 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
 attributes #0 = { nounwind readnone }
 attributes #1 = { "uniform-work-group-size"="true" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
index cdbca7f..5e109f4 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -61,5 +61,5 @@ define amdgpu_kernel void @kernel3() #0 {
 
 attributes #0 = { "uniform-work-group-size"="false" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
index 77eeb34..4dd8af0 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
@@ -447,7 +447,7 @@ body:             |
     ; CHECK-LABEL: name: test_vnmuls
     ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
     ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
-    ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
     ; CHECK: $s0 = COPY [[VNMULS]]
     ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
@@ -477,7 +477,7 @@ body:             |
     ; CHECK-LABEL: name: test_vnmuls_reassociate
     ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
     ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
-    ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
     ; CHECK: $s0 = COPY [[VNMULS]]
     ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
@@ -507,7 +507,7 @@ body:             |
     ; CHECK-LABEL: name: test_vnmuld
     ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
     ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
-    ; CHECK: [[VNMULD:%[0-9]+]]:dpr = VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VNMULD:%[0-9]+]]:dpr = nofpexcept VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
     ; CHECK: $d0 = COPY [[VNMULD]]
     ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s64) = COPY $d0
@@ -539,7 +539,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
     ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
     ; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
-    ; CHECK: [[VFNMAS:%[0-9]+]]:spr = VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VFNMAS:%[0-9]+]]:spr = nofpexcept VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
     ; CHECK: $s0 = COPY [[VFNMAS]]
     ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
@@ -573,7 +573,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
     ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
     ; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
-    ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = nofpexcept VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
     ; CHECK: $d0 = COPY [[VFNMAD]]
     ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s64) = COPY $d0
@@ -607,7 +607,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
     ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
     ; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
-    ; CHECK: [[VFMSS:%[0-9]+]]:spr = VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VFMSS:%[0-9]+]]:spr = nofpexcept VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
     ; CHECK: $s0 = COPY [[VFMSS]]
     ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
@@ -640,7 +640,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
     ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
     ; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
-    ; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VFMSD:%[0-9]+]]:dpr = nofpexcept VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr
     ; CHECK: $d0 = COPY [[VFMSD]]
     ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s64) = COPY $d0
@@ -673,7 +673,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
     ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
     ; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
-    ; CHECK: [[VFNMSS:%[0-9]+]]:spr = VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VFNMSS:%[0-9]+]]:spr = nofpexcept VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
     ; CHECK: $s0 = COPY [[VFNMSS]]
     ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
index 45a846b..4cded13 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
@@ -19,7 +19,7 @@ body:             |
   bb.1:
     ; CHECK-LABEL: name: test_fptosi
     ; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
-    ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
     ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]]
     ; CHECK: $r0 = COPY [[COPY1]]
     ; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
index ec834f1..4517fe6 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -O0 -mtriple arm-- -mattr=+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
 # RUN: llc -O0 -mtriple thumb-- -mattr=+v6t2,+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
 --- |
@@ -76,11 +77,9 @@ body:             |
 ...
 ---
 name:            test_fadd_s32
-# CHECK-LABEL: name: test_fadd_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -89,28 +88,29 @@ body:             |
   bb.0:
     liveins: $s0, $s1
 
+    ; CHECK-LABEL: name: test_fadd_s32
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+    ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $s0 = COPY [[VADDS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
-    ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
 
     %1(s32) = COPY $s1
-    ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
 
     %2(s32) = G_FADD %0, %1
-    ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VADDS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $s0 = COPY %2(s32)
-    ; CHECK: $s0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_fadd_s64
-# CHECK-LABEL: name: test_fadd_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -119,28 +119,29 @@ body:             |
   bb.0:
     liveins: $d0, $d1
 
+    ; CHECK-LABEL: name: test_fadd_s64
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+    ; CHECK-NEXT: [[VADDD:%[0-9]+]]:dpr = nofpexcept VADDD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $d0 = COPY [[VADDD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s64) = COPY $d0
-    ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
 
     %1(s64) = COPY $d1
-    ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
 
     %2(s64) = G_FADD %0, %1
-    ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VADDD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $d0 = COPY %2(s64)
-    ; CHECK: $d0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_fsub_s32
-# CHECK-LABEL: name: test_fsub_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -149,28 +150,29 @@ body:             |
   bb.0:
     liveins: $s0, $s1
 
+    ; CHECK-LABEL: name: test_fsub_s32
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+    ; CHECK-NEXT: [[VSUBS:%[0-9]+]]:spr = nofpexcept VSUBS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $s0 = COPY [[VSUBS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
-    ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
 
     %1(s32) = COPY $s1
-    ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
 
     %2(s32) = G_FSUB %0, %1
-    ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VSUBS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $s0 = COPY %2(s32)
-    ; CHECK: $s0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_fsub_s64
-# CHECK-LABEL: name: test_fsub_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -179,28 +181,29 @@ body:             |
   bb.0:
     liveins: $d0, $d1
 
+    ; CHECK-LABEL: name: test_fsub_s64
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+    ; CHECK-NEXT: [[VSUBD:%[0-9]+]]:dpr = nofpexcept VSUBD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $d0 = COPY [[VSUBD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s64) = COPY $d0
-    ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
 
     %1(s64) = COPY $d1
-    ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
 
     %2(s64) = G_FSUB %0, %1
-    ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VSUBD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $d0 = COPY %2(s64)
-    ; CHECK: $d0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_fmul_s32
-# CHECK-LABEL: name: test_fmul_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -209,28 +212,29 @@ body:             |
   bb.0:
     liveins: $s0, $s1
 
+    ; CHECK-LABEL: name: test_fmul_s32
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+    ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nofpexcept VMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $s0 = COPY [[VMULS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
-    ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
 
     %1(s32) = COPY $s1
-    ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
 
     %2(s32) = G_FMUL %0, %1
-    ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VMULS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $s0 = COPY %2(s32)
-    ; CHECK: $s0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_fmul_s64
-# CHECK-LABEL: name: test_fmul_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -239,28 +243,29 @@ body:             |
   bb.0:
     liveins: $d0, $d1
 
+    ; CHECK-LABEL: name: test_fmul_s64
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+    ; CHECK-NEXT: [[VMULD:%[0-9]+]]:dpr = nofpexcept VMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $d0 = COPY [[VMULD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s64) = COPY $d0
-    ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
 
     %1(s64) = COPY $d1
-    ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
 
     %2(s64) = G_FMUL %0, %1
-    ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VMULD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $d0 = COPY %2(s64)
-    ; CHECK: $d0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_fdiv_s32
-# CHECK-LABEL: name: test_fdiv_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -269,28 +274,29 @@ body:             |
   bb.0:
     liveins: $s0, $s1
 
+    ; CHECK-LABEL: name: test_fdiv_s32
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+    ; CHECK-NEXT: [[VDIVS:%[0-9]+]]:spr = nofpexcept VDIVS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $s0 = COPY [[VDIVS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
-    ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
 
     %1(s32) = COPY $s1
-    ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
 
     %2(s32) = G_FDIV %0, %1
-    ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VDIVS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $s0 = COPY %2(s32)
-    ; CHECK: $s0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_fdiv_s64
-# CHECK-LABEL: name: test_fdiv_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -299,28 +305,29 @@ body:             |
   bb.0:
     liveins: $d0, $d1
 
+    ; CHECK-LABEL: name: test_fdiv_s64
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+    ; CHECK-NEXT: [[VDIVD:%[0-9]+]]:dpr = nofpexcept VDIVD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $d0 = COPY [[VDIVD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s64) = COPY $d0
-    ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
 
     %1(s64) = COPY $d1
-    ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
 
     %2(s64) = G_FDIV %0, %1
-    ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VDIVD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $d0 = COPY %2(s64)
-    ; CHECK: $d0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_fneg_s32
-# CHECK-LABEL: name: test_fneg_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -328,25 +335,26 @@ body:             |
   bb.0:
     liveins: $s0
 
+    ; CHECK-LABEL: name: test_fneg_s32
+    ; CHECK: liveins: $s0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[VNEGS:%[0-9]+]]:spr = VNEGS [[COPY]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: $s0 = COPY [[VNEGS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
-    ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
 
     %1(s32) = G_FNEG %0
-    ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VNEGS [[VREGX]], 14 /* CC::al */, $noreg
 
     $s0 = COPY %1(s32)
-    ; CHECK: $s0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_fneg_s64
-# CHECK-LABEL: name: test_fneg_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -355,25 +363,26 @@ body:             |
   bb.0:
     liveins: $d0
 
+    ; CHECK-LABEL: name: test_fneg_s64
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+    ; CHECK-NEXT: [[VNEGD:%[0-9]+]]:dpr = VNEGD [[COPY]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: $d0 = COPY [[VNEGD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s64) = COPY $d0
-    ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
 
     %1(s64) = G_FNEG %0
-    ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VNEGD [[VREGX]], 14 /* CC::al */, $noreg
 
     $d0 = COPY %1(s64)
-    ; CHECK: $d0 = COPY [[VREGSUM]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_fma_s32
-# CHECK-LABEL: name: test_fma_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -383,31 +392,32 @@ body:             |
   bb.0:
     liveins: $s0, $s1, $s2
 
+    ; CHECK-LABEL: name: test_fma_s32
+    ; CHECK: liveins: $s0, $s1, $s2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:spr = COPY $s2
+    ; CHECK-NEXT: [[VFMAS:%[0-9]+]]:spr = nofpexcept VFMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $s0 = COPY [[VFMAS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $s0
-    ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
 
     %1(s32) = COPY $s1
-    ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
 
     %2(s32) = COPY $s2
-    ; CHECK: [[VREGZ:%[0-9]+]]:spr = COPY $s2
 
     %3(s32) = G_FMA %0, %1, %2
-    ; CHECK: [[VREGR:%[0-9]+]]:spr = VFMAS [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $s0 = COPY %3(s32)
-    ; CHECK: $s0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_fma_s64
-# CHECK-LABEL: name: test_fma_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -417,31 +427,32 @@ body:             |
   bb.0:
     liveins: $d0, $d1, $d2
 
+    ; CHECK-LABEL: name: test_fma_s64
+    ; CHECK: liveins: $d0, $d1, $d2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2
+    ; CHECK-NEXT: [[VFMAD:%[0-9]+]]:dpr = nofpexcept VFMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $d0 = COPY [[VFMAD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s64) = COPY $d0
-    ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
 
     %1(s64) = COPY $d1
-    ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
 
     %2(s64) = COPY $d2
-    ; CHECK: [[VREGZ:%[0-9]+]]:dpr = COPY $d2
 
     %3(s64) = G_FMA %0, %1, %2
-    ; CHECK: [[VREGR:%[0-9]+]]:dpr = VFMAD [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
 
     $d0 = COPY %3(s64)
-    ; CHECK: $d0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_fpext_s32_to_s64
-# CHECK-LABEL: name: test_fpext_s32_to_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -449,25 +460,26 @@ body:             |
   bb.0:
     liveins: $s0
 
+    ; CHECK-LABEL: name: test_fpext_s32_to_s64
+    ; CHECK: liveins: $s0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[VCVTDS:%[0-9]+]]:dpr = nofpexcept VCVTDS [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $d0 = COPY [[VCVTDS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s32) = COPY $s0
-    ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
 
     %1(s64) = G_FPEXT %0(s32)
-    ; CHECK: [[VREGR:%[0-9]+]]:dpr = VCVTDS [[VREGX]], 14 /* CC::al */, $noreg
 
     $d0 = COPY %1(s64)
-    ; CHECK: $d0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_fptrunc_s64_to_s32
-# CHECK-LABEL: name: test_fptrunc_s64_to_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: fprb }
@@ -475,25 +487,26 @@ body:             |
   bb.0:
     liveins: $d0
 
+    ; CHECK-LABEL: name: test_fptrunc_s64_to_s32
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+    ; CHECK-NEXT: [[VCVTSD:%[0-9]+]]:spr = nofpexcept VCVTSD [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: $s0 = COPY [[VCVTSD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s64) = COPY $d0
-    ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
 
     %1(s32) = G_FPTRUNC %0(s64)
-    ; CHECK: [[VREGR:%[0-9]+]]:spr = VCVTSD [[VREGX]], 14 /* CC::al */, $noreg
 
     $s0 = COPY %1(s32)
-    ; CHECK: $s0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_fptosi_s32
-# CHECK-LABEL: name: test_fptosi_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: gprb }
@@ -501,26 +514,27 @@ body:             |
   bb.0:
     liveins: $s0
 
+    ; CHECK-LABEL: name: test_fptosi_s32
+    ; CHECK: liveins: $s0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]]
+    ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0(s32) = COPY $s0
-    ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
 
     %1(s32) = G_FPTOSI %0(s32)
-    ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZS [[VREGX]], 14 /* CC::al */, $noreg
-    ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
 
     $r0 = COPY %1(s32)
-    ; CHECK: $r0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $r0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
 ...
 ---
 name:            test_fptosi_s64
-# CHECK-LABEL: name: test_fptosi_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: gprb }
@@ -528,26 +542,27 @@ body:             |
   bb.0:
     liveins: $d0
 
+    ; CHECK-LABEL: name: test_fptosi_s64
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+    ; CHECK-NEXT: [[VTOSIZD:%[0-9]+]]:spr = nofpexcept VTOSIZD [[COPY]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZD]]
+    ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0(s64) = COPY $d0
-    ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
 
     %1(s32) = G_FPTOSI %0(s64)
-    ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZD [[VREGX]], 14 /* CC::al */, $noreg
-    ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
 
     $r0 = COPY %1(s32)
-    ; CHECK: $r0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $r0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
 ...
 ---
 name:            test_fptoui_s32
-# CHECK-LABEL: name: test_fptoui_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: gprb }
@@ -555,26 +570,27 @@ body:             |
   bb.0:
     liveins: $s0
 
+    ; CHECK-LABEL: name: test_fptoui_s32
+    ; CHECK: liveins: $s0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[VTOUIZS:%[0-9]+]]:spr = nofpexcept VTOUIZS [[COPY]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZS]]
+    ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0(s32) = COPY $s0
-    ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
 
     %1(s32) = G_FPTOUI %0(s32)
-    ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZS [[VREGX]], 14 /* CC::al */, $noreg
-    ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
 
     $r0 = COPY %1(s32)
-    ; CHECK: $r0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $r0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
 ...
 ---
 name:            test_fptoui_s64
-# CHECK-LABEL: name: test_fptoui_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: fprb }
   - { id: 1, class: gprb }
@@ -582,26 +598,27 @@ body:             |
   bb.0:
     liveins: $d0
 
+    ; CHECK-LABEL: name: test_fptoui_s64
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+    ; CHECK-NEXT: [[VTOUIZD:%[0-9]+]]:spr = nofpexcept VTOUIZD [[COPY]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZD]]
+    ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
     %0(s64) = COPY $d0
-    ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
 
     %1(s32) = G_FPTOUI %0(s64)
-    ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZD [[VREGX]], 14 /* CC::al */, $noreg
-    ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
 
     $r0 = COPY %1(s32)
-    ; CHECK: $r0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $r0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
 ...
 ---
 name:            test_sitofp_s32
-# CHECK-LABEL: name: test_sitofp_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: gprb }
   - { id: 1, class: fprb }
@@ -609,26 +626,27 @@ body:             |
   bb.0:
     liveins: $r0
 
+    ; CHECK-LABEL: name: test_sitofp_s32
+    ; CHECK: liveins: $r0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+    ; CHECK-NEXT: [[VSITOS:%[0-9]+]]:spr = nofpexcept VSITOS [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: $s0 = COPY [[VSITOS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $r0
-    ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
 
     %1(s32) = G_SITOFP %0(s32)
-    ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
-    ; CHECK: [[VREGR:%[0-9]+]]:spr = VSITOS [[VREGF]], 14 /* CC::al */, $noreg
 
     $s0 = COPY %1(s32)
-    ; CHECK: $s0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_sitofp_s64
-# CHECK-LABEL: name: test_sitofp_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: gprb }
   - { id: 1, class: fprb }
@@ -636,26 +654,27 @@ body:             |
   bb.0:
     liveins: $r0
 
+    ; CHECK-LABEL: name: test_sitofp_s64
+    ; CHECK: liveins: $r0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+    ; CHECK-NEXT: [[VSITOD:%[0-9]+]]:dpr = nofpexcept VSITOD [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: $d0 = COPY [[VSITOD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s32) = COPY $r0
-    ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
 
     %1(s64) = G_SITOFP %0(s32)
-    ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
-    ; CHECK: [[VREGR:%[0-9]+]]:dpr = VSITOD [[VREGF]], 14 /* CC::al */, $noreg
 
     $d0 = COPY %1(s64)
-    ; CHECK: $d0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_uitofp_s32
-# CHECK-LABEL: name: test_uitofp_s32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: gprb }
   - { id: 1, class: fprb }
@@ -663,26 +682,27 @@ body:             |
   bb.0:
     liveins: $r0
 
+    ; CHECK-LABEL: name: test_uitofp_s32
+    ; CHECK: liveins: $r0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+    ; CHECK-NEXT: [[VUITOS:%[0-9]+]]:spr = nofpexcept VUITOS [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: $s0 = COPY [[VUITOS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(s32) = COPY $r0
-    ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
 
     %1(s32) = G_UITOFP %0(s32)
-    ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
-    ; CHECK: [[VREGR:%[0-9]+]]:spr = VUITOS [[VREGF]], 14 /* CC::al */, $noreg
 
     $s0 = COPY %1(s32)
-    ; CHECK: $s0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_uitofp_s64
-# CHECK-LABEL: name: test_uitofp_s64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: gprb }
   - { id: 1, class: fprb }
@@ -690,26 +710,27 @@ body:             |
   bb.0:
     liveins: $r0
 
+    ; CHECK-LABEL: name: test_uitofp_s64
+    ; CHECK: liveins: $r0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+    ; CHECK-NEXT: [[VUITOD:%[0-9]+]]:dpr = nofpexcept VUITOD [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: $d0 = COPY [[VUITOD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(s32) = COPY $r0
-    ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
 
     %1(s64) = G_UITOFP %0(s32)
-    ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
-    ; CHECK: [[VREGR:%[0-9]+]]:dpr = VUITOD [[VREGF]], 14 /* CC::al */, $noreg
 
     $d0 = COPY %1(s64)
-    ; CHECK: $d0 = COPY [[VREGR]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_load_f32
-# CHECK-LABEL: name: test_load_f32
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: gprb }
   - { id: 1, class: fprb }
@@ -717,25 +738,26 @@ body:             |
   bb.0:
     liveins: $r0
 
+    ; CHECK-LABEL: name: test_load_f32
+    ; CHECK: liveins: $r0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+    ; CHECK-NEXT: [[VLDRS:%[0-9]+]]:spr = VLDRS [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32))
+    ; CHECK-NEXT: $s0 = COPY [[VLDRS]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
     %0(p0) = COPY $r0
-    ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0
 
     %1(s32) = G_LOAD %0(p0) :: (load (s32))
-    ; CHECK: %[[V:[0-9]+]]:spr = VLDRS %[[P]], 0, 14 /* CC::al */, $noreg
 
     $s0 = COPY %1
-    ; CHECK: $s0 = COPY %[[V]]
 
     BX_RET 14, $noreg, implicit $s0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
 ...
 ---
 name:            test_load_f64
-# CHECK-LABEL: name: test_load_f64
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: gprb }
   - { id: 1, class: fprb }
@@ -743,45 +765,50 @@ body:             |
   bb.0:
     liveins: $r0
 
+    ; CHECK-LABEL: name: test_load_f64
+    ; CHECK: liveins: $r0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+    ; CHECK-NEXT: [[VLDRD:%[0-9]+]]:dpr = VLDRD [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s64))
+    ; CHECK-NEXT: $d0 = COPY [[VLDRD]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
     %0(p0) = COPY $r0
-    ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0
 
     %1(s64) = G_LOAD %0(p0) :: (load (s64))
-    ; CHECK: %[[V:[0-9]+]]:dpr = VLDRD %[[P]], 0, 14 /* CC::al */, $noreg
 
     $d0 = COPY %1
-    ; CHECK: $d0 = COPY %[[V]]
 
     BX_RET 14, $noreg, implicit $d0
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
 ...
 ---
 name:            test_stores
-# CHECK-LABEL: name: test_stores
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: gprb }
   - { id: 1, class: fprb }
   - { id: 2, class: fprb }
-# CHECK: id: [[P:[0-9]+]], class: gpr
-# CHECK: id: [[F32:[0-9]+]], class: spr
-# CHECK: id: [[F64:[0-9]+]], class: dpr
 body:             |
   bb.0:
     liveins: $r0, $s0, $d0
 
+    ; CHECK-LABEL: name: test_stores
+    ; CHECK: liveins: $r0, $s0, $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2
+    ; CHECK-NEXT: VSTRS [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32))
+    ; CHECK-NEXT: VSTRD [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s64))
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg
     %0(p0) = COPY $r0
     %1(s32) = COPY $s0
     %2(s64) = COPY $d2
 
     G_STORE %1(s32), %0(p0) :: (store (s32))
-    ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14 /* CC::al */, $noreg
 
     G_STORE %2(s64), %0(p0) :: (store (s64))
-    ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14 /* CC::al */, $noreg
 
     BX_RET 14, $noreg
 ...
@@ -833,11 +860,9 @@ body:             |
 ...
 ---
 name:            test_soft_fp_double
-# CHECK-LABEL: name: test_soft_fp_double
 legalized:       true
 regBankSelected: true
 selected:        false
-# CHECK: selected: true
 registers:
   - { id: 0, class: gprb }
   - { id: 1, class: gprb }
@@ -848,24 +873,27 @@ body:             |
   bb.0:
     liveins: $r0, $r1, $r2, $r3
 
+    ; CHECK-LABEL: name: test_soft_fp_double
+    ; CHECK: liveins: $r0, $r1, $r2, $r3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r2
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r3
+    ; CHECK-NEXT: [[VMOVDRR:%[0-9]+]]:dpr = VMOVDRR [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[VMOVDRR]], 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: $r0 = COPY [[VMOVRRD]]
+    ; CHECK-NEXT: $r1 = COPY [[VMOVRRD1]]
+    ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1
     %0(s32) = COPY $r2
-    ; CHECK: [[IN1:%[0-9]+]]:gpr = COPY $r2
 
     %1(s32) = COPY $r3
-    ; CHECK: [[IN2:%[0-9]+]]:gpr = COPY $r3
 
     %2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
-    ; CHECK: %[[DREG:[0-9]+]]:dpr = VMOVDRR [[IN1]], [[IN2]]
 
     %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64)
-    ; CHECK: [[OUT1:%[0-9]+]]:gpr, [[OUT2:%[0-9]+]]:gpr = VMOVRRD %[[DREG]]
 
     $r0 = COPY %3
-    ; CHECK: $r0 = COPY [[OUT1]]
 
     $r1 = COPY %4
-    ; CHECK: $r1 = COPY [[OUT2]]
 
     BX_RET 14, $noreg, implicit $r0, implicit $r1
-    ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1
 ...
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
index a6fc4da..fa982d8 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
@@ -31,7 +31,7 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
     ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
     ; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
-    ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
+    ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = nofpexcept VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr
     ; CHECK: $d0 = COPY [[VFNMSD]]
     ; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $d0
     %0:fprb(s64) = COPY $d0
diff --git a/llvm/test/CodeGen/ARM/bf16_fast_math.ll b/llvm/test/CodeGen/ARM/bf16_fast_math.ll
index 1b18ea6..5f7e1e6 100644
--- a/llvm/test/CodeGen/ARM/bf16_fast_math.ll
+++ b/llvm/test/CodeGen/ARM/bf16_fast_math.ll
@@ -17,7 +17,7 @@ define bfloat @normal_fadd(bfloat %x, bfloat %y) {
   ; CHECK-NOBF16-NEXT:   [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NOBF16-NEXT:   [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
-  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-NOBF16-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
   ; CHECK-NOBF16-NEXT:   $r0 = COPY [[VMOVRS]]
@@ -44,7 +44,7 @@ define bfloat @fast_fadd(bfloat %x, bfloat %y) {
   ; CHECK-NOBF16-NEXT:   [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NOBF16-NEXT:   [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
-  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-NOBF16-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
   ; CHECK-NOBF16-NEXT:   $r0 = COPY [[VMOVRS]]
@@ -71,7 +71,7 @@ define bfloat @ninf_fadd(bfloat %x, bfloat %y) {
   ; CHECK-NOBF16-NEXT:   [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NOBF16-NEXT:   [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
-  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-NOBF16-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
   ; CHECK-NOBF16-NEXT:   $r0 = COPY [[VMOVRS]]
@@ -102,7 +102,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
   ; CHECK-NOBF16-NEXT:   [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NOBF16-NEXT:   [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
-  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-NOBF16-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
   ; CHECK-NOBF16-NEXT:   $r0 = COPY [[VMOVRS]]
@@ -113,7 +113,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
   ; CHECK-NOBF16-NEXT:   [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NOBF16-NEXT:   [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg
-  ; CHECK-NOBF16-NEXT:   [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+  ; CHECK-NOBF16-NEXT:   [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-NOBF16-NEXT:   [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
   ; CHECK-NOBF16-NEXT:   $r0 = COPY [[VMOVRS1]]
@@ -142,10 +142,10 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z)
   ; CHECK-NOBF16-NEXT:   [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NOBF16-NEXT:   [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
-  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-NOBF16-NEXT:   [[MOVsi2:%[0-9]+]]:gpr = MOVsi [[COPY]], 130, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NOBF16-NEXT:   [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
-  ; CHECK-NOBF16-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+  ; CHECK-NOBF16-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-NOBF16-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
   ; CHECK-NOBF16-NEXT:   $r0 = COPY [[VMOVRS]]
@@ -174,7 +174,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
   ; CHECK-NOBF16-NEXT:   [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NOBF16-NEXT:   [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
-  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+  ; CHECK-NOBF16-NEXT:   [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-NOBF16-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
   ; CHECK-NOBF16-NEXT:   $r0 = COPY [[VMOVRS]]
@@ -185,7 +185,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
   ; CHECK-NOBF16-NEXT:   [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NOBF16-NEXT:   [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg
-  ; CHECK-NOBF16-NEXT:   [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+  ; CHECK-NOBF16-NEXT:   [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-NOBF16-NEXT:   [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
   ; CHECK-NOBF16-NEXT:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
   ; CHECK-NOBF16-NEXT:   $r0 = COPY [[VMOVRS1]]
diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
index 1bee32f..fe23e85 100644
--- a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
+++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
@@ -22,15 +22,16 @@ body:             |
 
     ; CHECK-LABEL: name: test_groups
     ; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4
-    ; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
-    ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
-    ; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
-    ; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
-    ; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
-    ; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
-    ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
+    ; CHECK-NEXT: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    ; CHECK-NEXT: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
+    ; CHECK-NEXT: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
+    ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
     renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
-    renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
+    renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
     renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
     t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
index 8e671c9..f5b2e98 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
@@ -81,7 +81,7 @@ body:             |
     STRi12 killed renamable $r1, killed renamable $r0, 0, 14, $noreg :: (volatile store (s32) into %ir.LL, align 8)
     dead renamable $r0 = SPACE 8920, undef renamable $r0
     renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S)
-    renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg
+    renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm
     VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S)
     renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg
     dead renamable $r1 = SPACE 1350, undef renamable $r0
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
index 03ddd80..4b66476 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
@@ -72,7 +72,7 @@ body:             |
     renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S)
     renamable $s0 = VLDRH %const.1, 0, 14, $noreg :: (load (s16) from constant-pool)
     dead renamable $r0 = SPACE 1230, undef renamable $r0
-    renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg
+    renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm
     VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S)
     renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg
     dead renamable $r1 = SPACE 1330, undef renamable $r0
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
index 46f028b..c16a62a 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
@@ -89,7 +89,7 @@ body:             |
     $sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg
     frame-setup CFI_INSTRUCTION def_cfa_offset 4
     renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool)
-    VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv
+    VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
     VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res)
     FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv
     Bcc %bb.2, 0, killed $cpsr
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
index 5a03fcd..049b7d9 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
@@ -95,7 +95,7 @@ body:             |
     $sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg
     frame-setup CFI_INSTRUCTION def_cfa_offset 4
     renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool)
-    VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv
+    VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
     VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res)
     FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv
     Bcc %bb.2, 0, killed $cpsr
diff --git a/llvm/test/CodeGen/ARM/fp16_fast_math.ll b/llvm/test/CodeGen/ARM/fp16_fast_math.ll
index 165eb4b..47e1f84f 100644
--- a/llvm/test/CodeGen/ARM/fp16_fast_math.ll
+++ b/llvm/test/CodeGen/ARM/fp16_fast_math.ll
@@ -16,11 +16,11 @@ define half @normal_fadd(half %x, half %y) {
   ; CHECK-CVT-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $r0
   ; CHECK-CVT-NEXT:   [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
   ; CHECK-CVT-NEXT:   [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
-  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
   ; CHECK-CVT-NEXT:   $r0 = COPY [[COPY4]]
   ; CHECK-CVT-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -33,7 +33,7 @@ define half @normal_fadd(half %x, half %y) {
   ; CHECK-FP16-NEXT:   [[COPY1:%[0-9]+]]:rgpr = COPY $r0
   ; CHECK-FP16-NEXT:   [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
   ; CHECK-FP16-NEXT:   [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
-  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
   ; CHECK-FP16-NEXT:   $r0 = COPY [[VADDH]]
   ; CHECK-FP16-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
 entry:
@@ -50,11 +50,11 @@ define half @fast_fadd(half %x, half %y) {
   ; CHECK-CVT-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $r0
   ; CHECK-CVT-NEXT:   [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
   ; CHECK-CVT-NEXT:   [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
-  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
   ; CHECK-CVT-NEXT:   $r0 = COPY [[COPY4]]
   ; CHECK-CVT-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -67,7 +67,7 @@ define half @fast_fadd(half %x, half %y) {
   ; CHECK-FP16-NEXT:   [[COPY1:%[0-9]+]]:rgpr = COPY $r0
   ; CHECK-FP16-NEXT:   [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
   ; CHECK-FP16-NEXT:   [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
-  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
   ; CHECK-FP16-NEXT:   $r0 = COPY [[VADDH]]
   ; CHECK-FP16-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
 entry:
@@ -84,11 +84,11 @@ define half @ninf_fadd(half %x, half %y) {
   ; CHECK-CVT-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $r0
   ; CHECK-CVT-NEXT:   [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
   ; CHECK-CVT-NEXT:   [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
-  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
   ; CHECK-CVT-NEXT:   $r0 = COPY [[COPY4]]
   ; CHECK-CVT-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -101,7 +101,7 @@ define half @ninf_fadd(half %x, half %y) {
   ; CHECK-FP16-NEXT:   [[COPY1:%[0-9]+]]:rgpr = COPY $r0
   ; CHECK-FP16-NEXT:   [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
   ; CHECK-FP16-NEXT:   [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
-  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
   ; CHECK-FP16-NEXT:   $r0 = COPY [[VADDH]]
   ; CHECK-FP16-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
 entry:
@@ -122,19 +122,19 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) {
   ; CHECK-CVT-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $r0
   ; CHECK-CVT-NEXT:   [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
   ; CHECK-CVT-NEXT:   [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS2:%[0-9]+]]:spr = VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS2:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
-  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
   ; CHECK-CVT-NEXT:   [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS3:%[0-9]+]]:spr = VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS3:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
-  ; CHECK-CVT-NEXT:   [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]]
   ; CHECK-CVT-NEXT:   $r0 = COPY [[COPY8]]
   ; CHECK-CVT-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -148,9 +148,9 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) {
   ; CHECK-FP16-NEXT:   [[COPY2:%[0-9]+]]:rgpr = COPY $r0
   ; CHECK-FP16-NEXT:   [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
   ; CHECK-FP16-NEXT:   [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
-  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
   ; CHECK-FP16-NEXT:   [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
-  ; CHECK-FP16-NEXT:   [[VADDH1:%[0-9]+]]:hpr = VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+  ; CHECK-FP16-NEXT:   [[VADDH1:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
   ; CHECK-FP16-NEXT:   $r0 = COPY [[VADDH1]]
   ; CHECK-FP16-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
 entry:
@@ -169,14 +169,14 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) {
   ; CHECK-CVT-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $r0
   ; CHECK-CVT-NEXT:   [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
   ; CHECK-CVT-NEXT:   [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
-  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
   ; CHECK-CVT-NEXT:   $r0 = COPY [[COPY6]]
   ; CHECK-CVT-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -190,9 +190,9 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) {
   ; CHECK-FP16-NEXT:   [[COPY2:%[0-9]+]]:rgpr = COPY $r0
   ; CHECK-FP16-NEXT:   [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
   ; CHECK-FP16-NEXT:   [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
-  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
   ; CHECK-FP16-NEXT:   [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
-  ; CHECK-FP16-NEXT:   [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+  ; CHECK-FP16-NEXT:   [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
   ; CHECK-FP16-NEXT:   $r0 = COPY [[VADDH1]]
   ; CHECK-FP16-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
 entry:
@@ -211,19 +211,19 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) {
   ; CHECK-CVT-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $r0
   ; CHECK-CVT-NEXT:   [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
   ; CHECK-CVT-NEXT:   [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS2:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS2:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
-  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
   ; CHECK-CVT-NEXT:   [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]]
-  ; CHECK-CVT-NEXT:   [[VCVTBHS3:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg
-  ; CHECK-CVT-NEXT:   [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBHS3:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr
+  ; CHECK-CVT-NEXT:   [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
-  ; CHECK-CVT-NEXT:   [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+  ; CHECK-CVT-NEXT:   [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
   ; CHECK-CVT-NEXT:   [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]]
   ; CHECK-CVT-NEXT:   $r0 = COPY [[COPY8]]
   ; CHECK-CVT-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -237,9 +237,9 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) {
   ; CHECK-FP16-NEXT:   [[COPY2:%[0-9]+]]:rgpr = COPY $r0
   ; CHECK-FP16-NEXT:   [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
   ; CHECK-FP16-NEXT:   [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
-  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+  ; CHECK-FP16-NEXT:   [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
   ; CHECK-FP16-NEXT:   [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
-  ; CHECK-FP16-NEXT:   [[VADDH1:%[0-9]+]]:hpr = ninf VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+  ; CHECK-FP16-NEXT:   [[VADDH1:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
   ; CHECK-FP16-NEXT:   $r0 = COPY [[VADDH1]]
   ; CHECK-FP16-NEXT:   MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
 entry:
diff --git a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
index c928390..90142cb 100644
--- a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
+++ b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
@@ -6,7 +6,7 @@ target triple = "armv7-eabi"
 
 declare void @bar1()
 define void @foo()#0 {
-; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30
+; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpscr_rm $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30
   call void @bar1()
   call void @bar2()
   ret void
diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll
index a1a04db..7274a8b 100644
--- a/llvm/test/CodeGen/ARM/llrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/llrint-conv.ll
@@ -1,7 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
 ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
-; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
 
 define i64 @testmsxh_builtin(half %x) {
 ; CHECK-SOFT-LABEL: testmsxh_builtin:
@@ -22,6 +23,14 @@ define i64 @testmsxh_builtin(half %x) {
 ; CHECK-NOFP16-NEXT:    bl llrintf
 ; CHECK-NOFP16-NEXT:    pop {r11, pc}
 ;
+; CHECK-FPv8-LABEL: testmsxh_builtin:
+; CHECK-FPv8:       @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT:    .save {r11, lr}
+; CHECK-FPv8-NEXT:    push {r11, lr}
+; CHECK-FPv8-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT:    bl llrintf
+; CHECK-FPv8-NEXT:    pop {r11, pc}
+;
 ; CHECK-FP16-LABEL: testmsxh_builtin:
 ; CHECK-FP16:       @ %bb.0: @ %entry
 ; CHECK-FP16-NEXT:    .save {r11, lr}
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 23a2685..2de2349 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,14 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
 ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
-; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
 
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-;   %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-;   ret i32 %0
-; }
+define i32 @testmswh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmswh_builtin:
+; CHECK-SOFT:       @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT:    .save {r11, lr}
+; CHECK-SOFT-NEXT:    push {r11, lr}
+; CHECK-SOFT-NEXT:    bl __aeabi_h2f
+; CHECK-SOFT-NEXT:    pop {r11, lr}
+; CHECK-SOFT-NEXT:    b lrintf
+;
+; CHECK-NOFP16-LABEL: testmswh_builtin:
+; CHECK-NOFP16:       @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT:    .save {r11, lr}
+; CHECK-NOFP16-NEXT:    push {r11, lr}
+; CHECK-NOFP16-NEXT:    vmov r0, s0
+; CHECK-NOFP16-NEXT:    bl __aeabi_h2f
+; CHECK-NOFP16-NEXT:    vmov s0, r0
+; CHECK-NOFP16-NEXT:    pop {r11, lr}
+; CHECK-NOFP16-NEXT:    b lrintf
+;
+; CHECK-FPv8-LABEL: testmswh_builtin:
+; CHECK-FPv8:       @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT:    b lrintf
+;
+; CHECK-FP16-LABEL: testmswh_builtin:
+; CHECK-FP16:       @ %bb.0: @ %entry
+; CHECK-FP16-NEXT:    vrintx.f16 s0, s0
+; CHECK-FP16-NEXT:    vcvt.s32.f16 s0, s0
+; CHECK-FP16-NEXT:    vmov r0, s0
+; CHECK-FP16-NEXT:    bx lr
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+  ret i32 %0
+}
 
 define i32 @testmsws_builtin(float %x) {
 ; CHECK-LABEL: testmsws_builtin:
@@ -39,8 +68,3 @@ entry:
   %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
   ret i32 %0
 }
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
-; CHECK-NOFP16: {{.*}}
-; CHECK-SOFT: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
index 46f3e4b..17d6619 100644
--- a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
+++ b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
@@ -14,7 +14,7 @@
 # CHECK: SU(1):   %1:dpr = VABSD %0:dpr, 14, $noreg
 # CHECK: SU(2):   %2:dpr = VLDRD %const.0, 0, 14, $noreg :: (load (s64) from constant-pool)
 # CHECK: SU(4):   %3:rgpr = t2MOVi 0, 14, $noreg, $noreg
-# CHECK: SU(3):   VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv
+# CHECK: SU(3):   VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
 # CHECK: SU(5):   $r0 = COPY %3:rgpr
 ---
 name:            test
@@ -29,7 +29,7 @@ body:             |
     %0:dpr = COPY $d0
     %1:dpr = VABSD %0, 14 /* CC::al */, $noreg
     %2:dpr = VLDRD %const.0, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool)
-    VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv
+    VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
     %4:rgpr = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
     $r0 = COPY %4
     tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
index c1159da..c3c8884 100644
--- a/llvm/test/CodeGen/ARM/vector-lrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -9,31 +9,1290 @@
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64
 
-; FIXME: crash "Do not know how to soft promote this operator's operand!"
-; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
-;   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
-;   ret <1 x iXLen> %a
-; }
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; LE-I32-LABEL: lrint_v1f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    vmov r0, s0
+; LE-I32-NEXT:    bl __aeabi_f2h
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r11, lr}
+; LE-I64-NEXT:    push {r11, lr}
+; LE-I64-NEXT:    vmov r0, s0
+; LE-I64-NEXT:    bl __aeabi_f2h
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d0[0], r0
+; LE-I64-NEXT:    vmov.32 d0[1], r1
+; LE-I64-NEXT:    pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_f2h
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r11, lr}
+; BE-I64-NEXT:    push {r11, lr}
+; BE-I64-NEXT:    vmov r0, s0
+; BE-I64-NEXT:    bl __aeabi_f2h
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEXT:    pop {r11, pc}
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+  ret <1 x iXLen> %a
+}
 
-; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
-;   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
-;   ret <2 x iXLen> %a
-; }
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; LE-I32-LABEL: lrint_v2f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8}
+; LE-I32-NEXT:    vpush {d8}
+; LE-I32-NEXT:    vmov r0, s0
+; LE-I32-NEXT:    vmov.f32 s16, s1
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov r1, s16
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    mov r0, r1
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vorr d0, d8, d8
+; LE-I32-NEXT:    vpop {d8}
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r11, lr}
+; LE-I64-NEXT:    .vsave {d8, d9}
+; LE-I64-NEXT:    vpush {d8, d9}
+; LE-I64-NEXT:    vmov r0, s1
+; LE-I64-NEXT:    vmov.f32 s16, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r4, r0
+; LE-I64-NEXT:    vmov r0, s16
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d9[0], r4
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d9[1], r5
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q0, q4, q4
+; LE-I64-NEXT:    vpop {d8, d9}
+; LE-I64-NEXT:    pop {r4, r5, r11, pc}
+;
+; BE-I32-LABEL: lrint_v2f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    .vsave {d8}
+; BE-I32-NEXT:    vpush {d8}
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    vmov.f32 s16, s1
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov r1, s16
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    mov r0, r1
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vrev64.32 d0, d8
+; BE-I32-NEXT:    vpop {d8}
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r11, lr}
+; BE-I64-NEXT:    .vsave {d8}
+; BE-I64-NEXT:    vpush {d8}
+; BE-I64-NEXT:    vmov r0, s1
+; BE-I64-NEXT:    vmov.f32 s16, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    vmov r0, s16
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vmov.32 d8[0], r4
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d8[1], r5
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d8
+; BE-I64-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEXT:    vpop {d8}
+; BE-I64-NEXT:    pop {r4, r5, r11, pc}
+  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+  ret <2 x iXLen> %a
+}
 
-; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
-;   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
-;   ret <4 x iXLen> %a
-; }
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+; LE-I32-LABEL: lrint_v4f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r11, lr}
+; LE-I32-NEXT:    push {r4, r5, r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I32-NEXT:    vmov r0, s3
+; LE-I32-NEXT:    vmov.f32 s16, s2
+; LE-I32-NEXT:    vmov.f32 s18, s1
+; LE-I32-NEXT:    vmov.f32 s20, s0
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    vmov r0, s16
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    vmov r0, s20
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r5
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    vmov r0, s18
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    vmov.32 d11[1], r4
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vorr q0, q5, q5
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I32-NEXT:    pop {r4, r5, r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT:    .vsave {d12, d13}
+; LE-I64-NEXT:    vpush {d12, d13}
+; LE-I64-NEXT:    .vsave {d8, d9, d10}
+; LE-I64-NEXT:    vpush {d8, d9, d10}
+; LE-I64-NEXT:    vmov r0, s1
+; LE-I64-NEXT:    vmov.f32 s16, s3
+; LE-I64-NEXT:    vmov.f32 s20, s2
+; LE-I64-NEXT:    vmov.f32 s18, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    vmov r0, s18
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r7, r0
+; LE-I64-NEXT:    vmov r0, s16
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r7
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    vmov r0, s20
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d13[0], r5
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d13[1], r4
+; LE-I64-NEXT:    vmov.32 d9[1], r6
+; LE-I64-NEXT:    vmov.32 d12[1], r7
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q0, q6, q6
+; LE-I64-NEXT:    vorr q1, q4, q4
+; LE-I64-NEXT:    vpop {d8, d9, d10}
+; LE-I64-NEXT:    vpop {d12, d13}
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-I32-LABEL: lrint_v4f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r11, lr}
+; BE-I32-NEXT:    push {r4, r5, r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; BE-I32-NEXT:    vmov r0, s3
+; BE-I32-NEXT:    vmov.f32 s16, s2
+; BE-I32-NEXT:    vmov.f32 s18, s1
+; BE-I32-NEXT:    vmov.f32 s20, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    vmov r0, s16
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    vmov r0, s20
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r5
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    vmov r0, s18
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    vmov.32 d11[1], r4
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11}
+; BE-I32-NEXT:    pop {r4, r5, r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT:    .vsave {d8, d9, d10}
+; BE-I64-NEXT:    vpush {d8, d9, d10}
+; BE-I64-NEXT:    vmov r0, s1
+; BE-I64-NEXT:    vmov.f32 s16, s3
+; BE-I64-NEXT:    vmov.f32 s18, s2
+; BE-I64-NEXT:    vmov.f32 s20, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    mov r5, r0
+; BE-I64-NEXT:    vmov r0, s20
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r7, r0
+; BE-I64-NEXT:    vmov r0, s16
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r7
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    vmov r0, s18
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vmov.32 d9[0], r5
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEXT:    vmov.32 d8[1], r6
+; BE-I64-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d9
+; BE-I64-NEXT:    vrev64.32 d3, d8
+; BE-I64-NEXT:    vrev64.32 d0, d10
+; BE-I64-NEXT:    vrev64.32 d2, d16
+; BE-I64-NEXT:    vpop {d8, d9, d10}
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
+  ret <4 x iXLen> %a
+}
 
-; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
-;   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
-;   ret <8 x iXLen> %a
-; }
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+; LE-I32-LABEL: lrint_v8f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT:    vmov r0, s7
+; LE-I32-NEXT:    vmov.f32 s18, s6
+; LE-I32-NEXT:    vmov.f32 s16, s5
+; LE-I32-NEXT:    vmov.f32 s20, s4
+; LE-I32-NEXT:    vmov.f32 s22, s3
+; LE-I32-NEXT:    vmov.f32 s24, s2
+; LE-I32-NEXT:    vmov.f32 s26, s1
+; LE-I32-NEXT:    vmov.f32 s28, s0
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    mov r8, r0
+; LE-I32-NEXT:    vmov r0, s26
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r9, r0
+; LE-I32-NEXT:    vmov r0, s22
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r6, r0
+; LE-I32-NEXT:    vmov r0, s28
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r7, r0
+; LE-I32-NEXT:    vmov r0, s24
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    vmov r0, s18
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    vmov r0, s20
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r5
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r4
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r7
+; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r6
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r9
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    vmov r0, s16
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    vmov.32 d11[1], r8
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vorr q0, q6, q6
+; LE-I32-NEXT:    vorr q1, q5, q5
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #8
+; LE-I64-NEXT:    sub sp, sp, #8
+; LE-I64-NEXT:    vmov r0, s1
+; LE-I64-NEXT:    vstr s6, [sp, #4] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.f32 s16, s7
+; LE-I64-NEXT:    vmov.f32 s18, s5
+; LE-I64-NEXT:    vmov.f32 s20, s4
+; LE-I64-NEXT:    vmov.f32 s22, s3
+; LE-I64-NEXT:    vmov.f32 s24, s2
+; LE-I64-NEXT:    vmov.f32 s26, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r9, r0
+; LE-I64-NEXT:    vmov r0, s26
+; LE-I64-NEXT:    str r1, [sp] @ 4-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r10, r0
+; LE-I64-NEXT:    vmov r0, s22
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    vmov r0, s24
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r7, r0
+; LE-I64-NEXT:    vmov r0, s18
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r6, r0
+; LE-I64-NEXT:    vmov r0, s20
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r4, r0
+; LE-I64-NEXT:    vmov r0, s16
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r4
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r6
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r7
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r5
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r10
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov r0, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d9[0], r9
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEXT:    vmov.32 d11[1], r11
+; LE-I64-NEXT:    vmov.32 d8[1], r4
+; LE-I64-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEXT:    vorr q0, q4, q4
+; LE-I64-NEXT:    vmov.32 d12[1], r8
+; LE-I64-NEXT:    vorr q1, q7, q7
+; LE-I64-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEXT:    vorr q2, q6, q6
+; LE-I64-NEXT:    vorr q3, q5, q5
+; LE-I64-NEXT:    add sp, sp, #8
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v8f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT:    vmov r0, s1
+; BE-I32-NEXT:    vmov.f32 s18, s7
+; BE-I32-NEXT:    vmov.f32 s20, s6
+; BE-I32-NEXT:    vmov.f32 s16, s5
+; BE-I32-NEXT:    vmov.f32 s22, s4
+; BE-I32-NEXT:    vmov.f32 s24, s3
+; BE-I32-NEXT:    vmov.f32 s26, s2
+; BE-I32-NEXT:    vmov.f32 s28, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    mov r8, r0
+; BE-I32-NEXT:    vmov r0, s24
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r9, r0
+; BE-I32-NEXT:    vmov r0, s18
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r6, r0
+; BE-I32-NEXT:    vmov r0, s26
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r7, r0
+; BE-I32-NEXT:    vmov r0, s20
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    vmov r0, s28
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    vmov r0, s22
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r5
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r4
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r7
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r6
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r9
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    vmov r0, s16
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    vmov.32 d12[1], r8
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q6
+; BE-I32-NEXT:    vrev64.32 q1, q5
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    .pad #8
+; BE-I64-NEXT:    sub sp, sp, #8
+; BE-I64-NEXT:    vmov r0, s1
+; BE-I64-NEXT:    vmov.f32 s18, s7
+; BE-I64-NEXT:    vmov.f32 s16, s6
+; BE-I64-NEXT:    vmov.f32 s20, s5
+; BE-I64-NEXT:    vmov.f32 s22, s4
+; BE-I64-NEXT:    vmov.f32 s24, s3
+; BE-I64-NEXT:    vmov.f32 s26, s2
+; BE-I64-NEXT:    vmov.f32 s28, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    mov r9, r0
+; BE-I64-NEXT:    vmov r0, s28
+; BE-I64-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r10, r0
+; BE-I64-NEXT:    vmov r0, s24
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r5, r0
+; BE-I64-NEXT:    vmov r0, s26
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r7, r0
+; BE-I64-NEXT:    vmov r0, s20
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r6, r0
+; BE-I64-NEXT:    vmov r0, s22
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    vmov r0, s18
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r4
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r6
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r7
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r5
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r10
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    vmov r0, s16
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vmov.32 d8[0], r9
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT:    vmov.32 d13[1], r5
+; BE-I64-NEXT:    vmov.32 d8[1], r0
+; BE-I64-NEXT:    vmov.32 d11[1], r6
+; BE-I64-NEXT:    vmov.32 d9[1], r11
+; BE-I64-NEXT:    vmov.32 d14[1], r4
+; BE-I64-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEXT:    vmov.32 d10[1], r8
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d8
+; BE-I64-NEXT:    vrev64.32 d3, d13
+; BE-I64-NEXT:    vrev64.32 d5, d11
+; BE-I64-NEXT:    vrev64.32 d7, d9
+; BE-I64-NEXT:    vrev64.32 d0, d14
+; BE-I64-NEXT:    vrev64.32 d2, d12
+; BE-I64-NEXT:    vrev64.32 d4, d10
+; BE-I64-NEXT:    vrev64.32 d6, d16
+; BE-I64-NEXT:    add sp, sp, #8
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
+  ret <8 x iXLen> %a
+}
 
-; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
-;   %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
-;   ret <16 x iXLen> %a
-; }
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+; LE-I32-LABEL: lrint_v16f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    .pad #8
+; LE-I32-NEXT:    sub sp, sp, #8
+; LE-I32-NEXT:    vmov r0, s15
+; LE-I32-NEXT:    vstr s13, [sp, #4] @ 4-byte Spill
+; LE-I32-NEXT:    vmov.f32 s26, s14
+; LE-I32-NEXT:    vstr s0, [sp] @ 4-byte Spill
+; LE-I32-NEXT:    vmov.f32 s20, s12
+; LE-I32-NEXT:    vmov.f32 s22, s11
+; LE-I32-NEXT:    vmov.f32 s18, s10
+; LE-I32-NEXT:    vmov.f32 s17, s9
+; LE-I32-NEXT:    vmov.f32 s24, s8
+; LE-I32-NEXT:    vmov.f32 s19, s7
+; LE-I32-NEXT:    vmov.f32 s30, s6
+; LE-I32-NEXT:    vmov.f32 s21, s5
+; LE-I32-NEXT:    vmov.f32 s16, s4
+; LE-I32-NEXT:    vmov.f32 s23, s3
+; LE-I32-NEXT:    vmov.f32 s28, s2
+; LE-I32-NEXT:    vmov.f32 s25, s1
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    mov r8, r0
+; LE-I32-NEXT:    vmov r0, s17
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r9, r0
+; LE-I32-NEXT:    vmov r0, s22
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r10, r0
+; LE-I32-NEXT:    vmov r0, s21
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r7, r0
+; LE-I32-NEXT:    vmov r0, s19
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    vmov r0, s25
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    vmov r0, s23
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r6, r0
+; LE-I32-NEXT:    vmov r0, s20
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    vmov r0, s26
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    vmov r0, s24
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    vmov r0, s18
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    vmov r0, s16
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    vmov r0, s30
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    vmov r0, s28
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vldr s0, [sp] @ 4-byte Reload
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    vmov r0, s0
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r6
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r5
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r4
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r7
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r10
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r9
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    vmov r0, s0
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    vmov.32 d11[1], r8
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vorr q0, q7, q7
+; LE-I32-NEXT:    vorr q1, q4, q4
+; LE-I32-NEXT:    vorr q2, q6, q6
+; LE-I32-NEXT:    vorr q3, q5, q5
+; LE-I32-NEXT:    add sp, sp, #8
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-LABEL: lrint_v16f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #120
+; LE-I64-NEXT:    sub sp, sp, #120
+; LE-I64-NEXT:    mov r11, r0
+; LE-I64-NEXT:    vmov r0, s7
+; LE-I64-NEXT:    vstr s15, [sp, #24] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.f32 s23, s13
+; LE-I64-NEXT:    vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.f32 s25, s12
+; LE-I64-NEXT:    vmov.f32 s27, s11
+; LE-I64-NEXT:    vstr s10, [sp, #104] @ 4-byte Spill
+; LE-I64-NEXT:    vstr s9, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.f32 s24, s8
+; LE-I64-NEXT:    vmov.f32 s19, s6
+; LE-I64-NEXT:    vmov.f32 s29, s5
+; LE-I64-NEXT:    vmov.f32 s17, s4
+; LE-I64-NEXT:    vmov.f32 s16, s3
+; LE-I64-NEXT:    vmov.f32 s21, s2
+; LE-I64-NEXT:    vmov.f32 s26, s1
+; LE-I64-NEXT:    vmov.f32 s18, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r7, r0
+; LE-I64-NEXT:    vmov r0, s25
+; LE-I64-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    vmov r0, s27
+; LE-I64-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r6, r0
+; LE-I64-NEXT:    vmov r0, s29
+; LE-I64-NEXT:    str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    vmov r0, s23
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    vmov.32 d17[0], r6
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r6, r0
+; LE-I64-NEXT:    vmov r0, s17
+; LE-I64-NEXT:    vmov r8, s21
+; LE-I64-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT:    vmov r10, s19
+; LE-I64-NEXT:    vmov.32 d10[0], r5
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vmov.32 d11[0], r6
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    mov r0, r10
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d11[0], r7
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r6, r0
+; LE-I64-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d11[1], r0
+; LE-I64-NEXT:    vmov r0, s18
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    vmov r0, s16
+; LE-I64-NEXT:    vmov.32 d10[1], r7
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    vmov r0, s26
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vmov r8, s24
+; LE-I64-NEXT:    vmov.32 d14[1], r9
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov s24, r5
+; LE-I64-NEXT:    vldr s0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    vmov r7, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEXT:    vmov s22, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    vmov s24, r6
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    mov r0, r7
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEXT:    vmov s22, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEXT:    mov r7, r0
+; LE-I64-NEXT:    vmov.32 d14[1], r5
+; LE-I64-NEXT:    vmov r0, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEXT:    vmov s20, r0
+; LE-I64-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEXT:    vmov r4, s0
+; LE-I64-NEXT:    vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT:    vmov r0, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEXT:    vmov s16, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    vmov s18, r7
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    mov r0, r4
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov.f32 s0, s18
+; LE-I64-NEXT:    vmov s16, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d11[1], r6
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    vmov.32 d10[1], r4
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d16[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vmov.32 d19[1], r0
+; LE-I64-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d21[1], r10
+; LE-I64-NEXT:    vmov.32 d18[1], r0
+; LE-I64-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEXT:    vmov.32 d17[1], r0
+; LE-I64-NEXT:    add r0, r11, #64
+; LE-I64-NEXT:    vmov.32 d16[1], r1
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT:    vmov.32 d20[1], r9
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT:    vst1.64 {d20, d21}, [r11:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT:    add sp, sp, #120
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    .pad #16
+; BE-I32-NEXT:    sub sp, sp, #16
+; BE-I32-NEXT:    vmov r0, s1
+; BE-I32-NEXT:    vstr s14, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT:    vmov.f32 s30, s15
+; BE-I32-NEXT:    vstr s13, [sp, #12] @ 4-byte Spill
+; BE-I32-NEXT:    vmov.f32 s17, s12
+; BE-I32-NEXT:    vstr s10, [sp, #8] @ 4-byte Spill
+; BE-I32-NEXT:    vmov.f32 s19, s11
+; BE-I32-NEXT:    vstr s8, [sp] @ 4-byte Spill
+; BE-I32-NEXT:    vmov.f32 s21, s9
+; BE-I32-NEXT:    vmov.f32 s23, s7
+; BE-I32-NEXT:    vmov.f32 s24, s6
+; BE-I32-NEXT:    vmov.f32 s25, s5
+; BE-I32-NEXT:    vmov.f32 s26, s4
+; BE-I32-NEXT:    vmov.f32 s27, s3
+; BE-I32-NEXT:    vmov.f32 s28, s2
+; BE-I32-NEXT:    vmov.f32 s29, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    mov r8, r0
+; BE-I32-NEXT:    vmov r0, s27
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r9, r0
+; BE-I32-NEXT:    vmov r0, s25
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r10, r0
+; BE-I32-NEXT:    vmov r0, s23
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r7, r0
+; BE-I32-NEXT:    vmov r0, s21
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    vmov r0, s19
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    vmov r0, s30
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r6, r0
+; BE-I32-NEXT:    vmov r0, s17
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    vmov r0, s29
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    vmov r0, s28
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    vmov r0, s26
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEXT:    vmov r0, s24
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldr s0, [sp] @ 4-byte Reload
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldr s0, [sp, #8] @ 4-byte Reload
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r6
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r5
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r4
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r7
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r10
+; BE-I32-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r9
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldr s0, [sp, #12] @ 4-byte Reload
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    vmov.32 d10[1], r8
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vrev64.32 q1, q7
+; BE-I32-NEXT:    vrev64.32 q2, q6
+; BE-I32-NEXT:    vrev64.32 q3, q4
+; BE-I32-NEXT:    add sp, sp, #16
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I64-LABEL: lrint_v16f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #112
+; BE-I64-NEXT:    sub sp, sp, #112
+; BE-I64-NEXT:    mov r11, r0
+; BE-I64-NEXT:    vmov r0, s14
+; BE-I64-NEXT:    vmov.f32 s17, s15
+; BE-I64-NEXT:    vstr s13, [sp, #52] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s21, s12
+; BE-I64-NEXT:    vstr s10, [sp, #68] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s23, s11
+; BE-I64-NEXT:    vstr s7, [sp, #72] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s19, s9
+; BE-I64-NEXT:    vstr s4, [sp, #28] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s26, s8
+; BE-I64-NEXT:    vmov.f32 s24, s6
+; BE-I64-NEXT:    vmov.f32 s18, s5
+; BE-I64-NEXT:    vmov.f32 s25, s3
+; BE-I64-NEXT:    vmov.f32 s16, s2
+; BE-I64-NEXT:    vmov.f32 s27, s1
+; BE-I64-NEXT:    vmov.f32 s29, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    mov r8, r0
+; BE-I64-NEXT:    vmov r0, s29
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r9, r0
+; BE-I64-NEXT:    vmov r0, s27
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r7, r0
+; BE-I64-NEXT:    vmov r0, s21
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r6, r0
+; BE-I64-NEXT:    vmov r0, s25
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r5, r0
+; BE-I64-NEXT:    vmov r0, s23
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov s0, r5
+; BE-I64-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT:    vstr d16, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov s0, r6
+; BE-I64-NEXT:    str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEXT:    vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov s0, r7
+; BE-I64-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEXT:    vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r9
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vmov r0, s17
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vmov.32 d10[0], r8
+; BE-I64-NEXT:    vmov r6, s19
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r6, r0
+; BE-I64-NEXT:    vmov r0, s18
+; BE-I64-NEXT:    vmov.32 d10[1], r4
+; BE-I64-NEXT:    vstr d10, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    vmov r0, s16
+; BE-I64-NEXT:    vmov.32 d11[1], r7
+; BE-I64-NEXT:    vstr d11, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.32 d15[1], r5
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vstr d15, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vldr s0, [sp, #28] @ 4-byte Reload
+; BE-I64-NEXT:    vmov r5, s26
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov s26, r4
+; BE-I64-NEXT:    vmov r0, s0
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d14[1], r10
+; BE-I64-NEXT:    vmov r4, s24
+; BE-I64-NEXT:    vstr d16, [sp] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d14, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.f32 s0, s26
+; BE-I64-NEXT:    vmov s22, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s22
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vmov s24, r6
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    mov r0, r4
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.f32 s0, s24
+; BE-I64-NEXT:    vmov s22, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s22
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vldr s0, [sp, #52] @ 4-byte Reload
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEXT:    vmov r0, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vldr s0, [sp, #68] @ 4-byte Reload
+; BE-I64-NEXT:    vmov s20, r0
+; BE-I64-NEXT:    vmov.32 d11[1], r6
+; BE-I64-NEXT:    vmov r7, s0
+; BE-I64-NEXT:    vldr s0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEXT:    vmov r0, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.f32 s0, s20
+; BE-I64-NEXT:    vmov s16, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    vmov s18, r4
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    mov r0, r7
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.f32 s0, s18
+; BE-I64-NEXT:    vmov s16, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d24[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEXT:    vldr d23, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d20, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d23[1], r0
+; BE-I64-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEXT:    vldr d22, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEXT:    vmov.32 d22[1], r0
+; BE-I64-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT:    vldr d30, [sp] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d25, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d20, d26
+; BE-I64-NEXT:    vldr d26, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d10[1], r5
+; BE-I64-NEXT:    vmov.32 d12[1], r9
+; BE-I64-NEXT:    vldr d28, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d27, d26
+; BE-I64-NEXT:    vmov.32 d25[1], r0
+; BE-I64-NEXT:    add r0, r11, #64
+; BE-I64-NEXT:    vmov.32 d30[1], r8
+; BE-I64-NEXT:    vmov.32 d9[1], r6
+; BE-I64-NEXT:    vrev64.32 d26, d28
+; BE-I64-NEXT:    vrev64.32 d29, d10
+; BE-I64-NEXT:    vmov.32 d24[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d12
+; BE-I64-NEXT:    vrev64.32 d28, d23
+; BE-I64-NEXT:    vrev64.32 d23, d22
+; BE-I64-NEXT:    vrev64.32 d22, d30
+; BE-I64-NEXT:    vrev64.32 d31, d25
+; BE-I64-NEXT:    vrev64.32 d0, d9
+; BE-I64-NEXT:    vrev64.32 d30, d24
+; BE-I64-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 d19, d13
+; BE-I64-NEXT:    vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r11:128]!
+; BE-I64-NEXT:    vrev64.32 d18, d14
+; BE-I64-NEXT:    vst1.64 {d22, d23}, [r11:128]!
+; BE-I64-NEXT:    vrev64.32 d17, d15
+; BE-I64-NEXT:    vrev64.32 d16, d11
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; BE-I64-NEXT:    add sp, sp, #112
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
+  ret <16 x iXLen> %a
+}
 
 define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; LE-I32-LABEL: lrint_v1f32:
diff --git a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
index 8fa9337..03cb8e3 100644
--- a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
+++ b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
@@ -60,9 +60,9 @@ body:             |
     $sp = t2STMDB_UPD $sp, 14, $noreg, $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11
     $r4 = t2BICri $r4, 1, 14, $noreg, $noreg
     $sp = tSUBspi $sp, 34, 14, $noreg
-    VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15
+    VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $fpscr_rm, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15
     tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7
-    VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15
+    VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15
     $sp = tADDspi $sp, 34, 14, $noreg
     $sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
     $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc
diff --git a/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll
new file mode 100644
index 0000000..1db8391
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+@page1 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+@page2 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+
+define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16) to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16), i8 0, i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8) to ptr), i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8), i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
+
+define dso_local void @test_memmove() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memmove() local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 16) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 16), ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) writeonly captures(none), ptr addrspace(1) readonly captures(none), i64, i1 immarg)
+
+define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.memset.inline.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16) to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memset.inline.p1.i64(ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16), i8 0, i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.inline.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8) to ptr), i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8), i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
diff --git a/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll
new file mode 100644
index 0000000..62fa2e4
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+@page1 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+@page2 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+
+define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef align 8 dereferenceable(16) @page1, i8 0, i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page2 to ptr), ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef align 8 dereferenceable(16) @page2, ptr addrspace(1) noundef align 8 dereferenceable(16) @page1, i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
+
+define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.memset.inline.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memset.inline.p1.i64(ptr addrspace(1) align 8 @page1, i8 0, i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.inline.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page2 to ptr), ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) align 8 @page2, ptr addrspace(1) align 8 @page1, i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-Invalid-Flag-LargeNumber.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-Invalid-Flag-LargeNumber.ll
new file mode 100644
index 0000000..c27c87f
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-Invalid-Flag-LargeNumber.ll
@@ -0,0 +1,20 @@
+; RUN: not opt -passes='print<dxil-root-signature>' %s -S -o - 2>&1 | FileCheck %s
+
+target triple = "dxil-unknown-shadermodel6.0-compute"
+
+; CHECK: error: Invalid value for DescriptorFlag: 66666
+; CHECK-NOT: Root Signature Definitions
+
+define void @main() #0 {
+entry:
+  ret void
+}
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
+
+
+!dx.rootsignatures = !{!2} ; list of function/root signature pairs
+!2 = !{ ptr @main, !3, i32 2 } ; function, root signature
+!3 = !{ !5 } ; list of root signature elements
+!5 = !{ !"DescriptorTable", i32 0, !6, !7 }
+!6 = !{ !"SRV", i32 1, i32 1, i32 0, i32 -1, i32 66666 }
+!7 = !{ !"UAV", i32 5, i32 1, i32 10, i32 5, i32 2 }
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags-LargeNumber.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags-LargeNumber.ll
new file mode 100644
index 0000000..898e197
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags-LargeNumber.ll
@@ -0,0 +1,18 @@
+; RUN: not opt -passes='print<dxil-root-signature>' %s -S -o - 2>&1 | FileCheck %s
+
+target triple = "dxil-unknown-shadermodel6.0-compute"
+
+
+; CHECK: error: Invalid value for RootDescriptorFlag: 666
+; CHECK-NOT: Root Signature Definitions
+define void @main() #0 {
+entry:
+  ret void
+}
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
+
+
+!dx.rootsignatures = !{!2} ; list of function/root signature pairs
+!2 = !{ ptr @main, !3, i32 2 } ; function, root signature
+!3 = !{ !5 } ; list of root signature elements
+!5 = !{ !"RootCBV", i32 0, i32 1, i32 2, i32 666  }
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json
new file mode 100644
index 0000000..2894fff
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json
@@ -0,0 +1,11 @@
+{
+    "entities" : {
+        "ABS_Fp":[1, 2],
+        "ADC":[3, 4],
+        "ADD":[5, 6],
+        "ADDPDrm":[7, 8],
+        "ADDPDrr":[9, 10],
+        "ADDPSrr":[11, 12],
+        "ADDSDrm":[13, 14]
+    }
+}
+\ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json
new file mode 100644
index 0000000..bf04163
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json
@@ -0,0 +1,7 @@
+{
+    "entities": {
+        "ADD": [1.0, 2.0, 3.0],
+        "SUB": [1.5],
+        "MUL": [2.0, 3.0]
+    }
+}
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json
new file mode 100644
index 0000000..585a85e
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json
@@ -0,0 +1,5 @@
+{
+    "invalid_structure": {
+        "ADD": [ 1, 2, 3]
+    }
+}
+\ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json
new file mode 100644
index 0000000..63e8ccbd
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json
@@ -0,0 +1,12 @@
+{
+    "entities": {
+        "ADD": [],
+        "SUB": [],
+        "MUL": [],
+        "MOV": [],
+        "CMP": [],
+        "JMP": [],
+        "CALL": [],
+        "RET": []
+    }
+}
+\ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
new file mode 100644
index 0000000..6327cff
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
@@ -0,0 +1,6882 @@
+Key: AAA:  [ 0.00  0.00 ]
+Key: AAD:  [ 0.00  0.00 ]
+Key: AADD:  [ 0.00  0.00 ]
+Key: AAM:  [ 0.00  0.00 ]
+Key: AAND:  [ 0.00  0.00 ]
+Key: AAS:  [ 0.00  0.00 ]
+Key: ABS_F:  [ 0.00  0.00 ]
+Key: ABS_Fp:  [ 1.00  2.00 ]
+Key: ADC:  [ 3.00  4.00 ]
+Key: ADCX:  [ 0.00  0.00 ]
+Key: ADD:  [ 5.00  6.00 ]
+Key: ADDPDrm:  [ 7.00  8.00 ]
+Key: ADDPDrr:  [ 9.00  10.00 ]
+Key: ADDPSrm:  [ 0.00  0.00 ]
+Key: ADDPSrr:  [ 11.00  12.00 ]
+Key: ADDR:  [ 0.00  0.00 ]
+Key: ADDSDrm:  [ 13.00  14.00 ]
+Key: ADDSDrm_Int:  [ 0.00  0.00 ]
+Key: ADDSDrr:  [ 0.00  0.00 ]
+Key: ADDSDrr_Int:  [ 0.00  0.00 ]
+Key: ADDSSrm:  [ 0.00  0.00 ]
+Key: ADDSSrm_Int:  [ 0.00  0.00 ]
+Key: ADDSSrr:  [ 0.00  0.00 ]
+Key: ADDSSrr_Int:  [ 0.00  0.00 ]
+Key: ADDSUBPDrm:  [ 0.00  0.00 ]
+Key: ADDSUBPDrr:  [ 0.00  0.00 ]
+Key: ADDSUBPSrm:  [ 0.00  0.00 ]
+Key: ADDSUBPSrr:  [ 0.00  0.00 ]
+Key: ADD_F:  [ 0.00  0.00 ]
+Key: ADD_FI:  [ 0.00  0.00 ]
+Key: ADD_FPrST:  [ 0.00  0.00 ]
+Key: ADD_FST:  [ 0.00  0.00 ]
+Key: ADD_Fp:  [ 0.00  0.00 ]
+Key: ADD_FpI:  [ 0.00  0.00 ]
+Key: ADD_FrST:  [ 0.00  0.00 ]
+Key: ADJCALLSTACKDOWN:  [ 0.00  0.00 ]
+Key: ADJCALLSTACKUP:  [ 0.00  0.00 ]
+Key: ADOX:  [ 0.00  0.00 ]
+Key: AESDEC:  [ 0.00  0.00 ]
+Key: AESDECLASTrm:  [ 0.00  0.00 ]
+Key: AESDECLASTrr:  [ 0.00  0.00 ]
+Key: AESDECWIDE:  [ 0.00  0.00 ]
+Key: AESDECrm:  [ 0.00  0.00 ]
+Key: AESDECrr:  [ 0.00  0.00 ]
+Key: AESENC:  [ 0.00  0.00 ]
+Key: AESENCLASTrm:  [ 0.00  0.00 ]
+Key: AESENCLASTrr:  [ 0.00  0.00 ]
+Key: AESENCWIDE:  [ 0.00  0.00 ]
+Key: AESENCrm:  [ 0.00  0.00 ]
+Key: AESENCrr:  [ 0.00  0.00 ]
+Key: AESIMCrm:  [ 0.00  0.00 ]
+Key: AESIMCrr:  [ 0.00  0.00 ]
+Key: AESKEYGENASSISTrmi:  [ 0.00  0.00 ]
+Key: AESKEYGENASSISTrri:  [ 0.00  0.00 ]
+Key: AND:  [ 0.00  0.00 ]
+Key: ANDN:  [ 0.00  0.00 ]
+Key: ANDNPDrm:  [ 0.00  0.00 ]
+Key: ANDNPDrr:  [ 0.00  0.00 ]
+Key: ANDNPSrm:  [ 0.00  0.00 ]
+Key: ANDNPSrr:  [ 0.00  0.00 ]
+Key: ANDPDrm:  [ 0.00  0.00 ]
+Key: ANDPDrr:  [ 0.00  0.00 ]
+Key: ANDPSrm:  [ 0.00  0.00 ]
+Key: ANDPSrr:  [ 0.00  0.00 ]
+Key: ANNOTATION_LABEL:  [ 0.00  0.00 ]
+Key: AOR:  [ 0.00  0.00 ]
+Key: ARITH_FENCE:  [ 0.00  0.00 ]
+Key: ARPL:  [ 0.00  0.00 ]
+Key: ASAN_CHECK_MEMACCESS:  [ 0.00  0.00 ]
+Key: AVX:  [ 0.00  0.00 ]
+Key: AVX_SET:  [ 0.00  0.00 ]
+Key: AXOR:  [ 0.00  0.00 ]
+Key: BEXTR:  [ 0.00  0.00 ]
+Key: BEXTRI:  [ 0.00  0.00 ]
+Key: BLCFILL:  [ 0.00  0.00 ]
+Key: BLCI:  [ 0.00  0.00 ]
+Key: BLCIC:  [ 0.00  0.00 ]
+Key: BLCMSK:  [ 0.00  0.00 ]
+Key: BLCS:  [ 0.00  0.00 ]
+Key: BLENDPDrmi:  [ 0.00  0.00 ]
+Key: BLENDPDrri:  [ 0.00  0.00 ]
+Key: BLENDPSrmi:  [ 0.00  0.00 ]
+Key: BLENDPSrri:  [ 0.00  0.00 ]
+Key: BLENDVPDrm:  [ 0.00  0.00 ]
+Key: BLENDVPDrr:  [ 0.00  0.00 ]
+Key: BLENDVPSrm:  [ 0.00  0.00 ]
+Key: BLENDVPSrr:  [ 0.00  0.00 ]
+Key: BLSFILL:  [ 0.00  0.00 ]
+Key: BLSI:  [ 0.00  0.00 ]
+Key: BLSIC:  [ 0.00  0.00 ]
+Key: BLSMSK:  [ 0.00  0.00 ]
+Key: BLSR:  [ 0.00  0.00 ]
+Key: BOUNDS:  [ 0.00  0.00 ]
+Key: BSF:  [ 0.00  0.00 ]
+Key: BSR:  [ 0.00  0.00 ]
+Key: BSWAP:  [ 0.00  0.00 ]
+Key: BT:  [ 0.00  0.00 ]
+Key: BTC:  [ 0.00  0.00 ]
+Key: BTR:  [ 0.00  0.00 ]
+Key: BTS:  [ 0.00  0.00 ]
+Key: BUNDLE:  [ 0.00  0.00 ]
+Key: BZHI:  [ 0.00  0.00 ]
+Key: CALL:  [ 0.00  0.00 ]
+Key: CALLpcrel:  [ 0.00  0.00 ]
+Key: CATCHRET:  [ 0.00  0.00 ]
+Key: CBW:  [ 0.00  0.00 ]
+Key: CCMP:  [ 0.00  0.00 ]
+Key: CDQ:  [ 0.00  0.00 ]
+Key: CDQE:  [ 0.00  0.00 ]
+Key: CFCMOV:  [ 0.00  0.00 ]
+Key: CFI_INSTRUCTION:  [ 0.00  0.00 ]
+Key: CHS_F:  [ 0.00  0.00 ]
+Key: CHS_Fp:  [ 0.00  0.00 ]
+Key: CLAC:  [ 0.00  0.00 ]
+Key: CLC:  [ 0.00  0.00 ]
+Key: CLD:  [ 0.00  0.00 ]
+Key: CLDEMOTE:  [ 0.00  0.00 ]
+Key: CLEANUPRET:  [ 0.00  0.00 ]
+Key: CLFLUSH:  [ 0.00  0.00 ]
+Key: CLFLUSHOPT:  [ 0.00  0.00 ]
+Key: CLGI:  [ 0.00  0.00 ]
+Key: CLI:  [ 0.00  0.00 ]
+Key: CLRSSBSY:  [ 0.00  0.00 ]
+Key: CLTS:  [ 0.00  0.00 ]
+Key: CLUI:  [ 0.00  0.00 ]
+Key: CLWB:  [ 0.00  0.00 ]
+Key: CLZERO:  [ 0.00  0.00 ]
+Key: CMC:  [ 0.00  0.00 ]
+Key: CMOV:  [ 0.00  0.00 ]
+Key: CMOVBE_F:  [ 0.00  0.00 ]
+Key: CMOVBE_Fp:  [ 0.00  0.00 ]
+Key: CMOVB_F:  [ 0.00  0.00 ]
+Key: CMOVB_Fp:  [ 0.00  0.00 ]
+Key: CMOVE_F:  [ 0.00  0.00 ]
+Key: CMOVE_Fp:  [ 0.00  0.00 ]
+Key: CMOVNBE_F:  [ 0.00  0.00 ]
+Key: CMOVNBE_Fp:  [ 0.00  0.00 ]
+Key: CMOVNB_F:  [ 0.00  0.00 ]
+Key: CMOVNB_Fp:  [ 0.00  0.00 ]
+Key: CMOVNE_F:  [ 0.00  0.00 ]
+Key: CMOVNE_Fp:  [ 0.00  0.00 ]
+Key: CMOVNP_F:  [ 0.00  0.00 ]
+Key: CMOVNP_Fp:  [ 0.00  0.00 ]
+Key: CMOVP_F:  [ 0.00  0.00 ]
+Key: CMOVP_Fp:  [ 0.00  0.00 ]
+Key: CMOV_FR:  [ 0.00  0.00 ]
+Key: CMOV_GR:  [ 0.00  0.00 ]
+Key: CMOV_RFP:  [ 0.00  0.00 ]
+Key: CMOV_VK:  [ 0.00  0.00 ]
+Key: CMOV_VR:  [ 0.00  0.00 ]
+Key: CMP:  [ 0.00  0.00 ]
+Key: CMPCCXADDmr:  [ 0.00  0.00 ]
+Key: CMPPDrmi:  [ 0.00  0.00 ]
+Key: CMPPDrri:  [ 0.00  0.00 ]
+Key: CMPPSrmi:  [ 0.00  0.00 ]
+Key: CMPPSrri:  [ 0.00  0.00 ]
+Key: CMPSB:  [ 0.00  0.00 ]
+Key: CMPSDrmi:  [ 0.00  0.00 ]
+Key: CMPSDrmi_Int:  [ 0.00  0.00 ]
+Key: CMPSDrri:  [ 0.00  0.00 ]
+Key: CMPSDrri_Int:  [ 0.00  0.00 ]
+Key: CMPSL:  [ 0.00  0.00 ]
+Key: CMPSQ:  [ 0.00  0.00 ]
+Key: CMPSSrmi:  [ 0.00  0.00 ]
+Key: CMPSSrmi_Int:  [ 0.00  0.00 ]
+Key: CMPSSrri:  [ 0.00  0.00 ]
+Key: CMPSSrri_Int:  [ 0.00  0.00 ]
+Key: CMPSW:  [ 0.00  0.00 ]
+Key: CMPXCHG:  [ 0.00  0.00 ]
+Key: COMISDrm:  [ 0.00  0.00 ]
+Key: COMISDrm_Int:  [ 0.00  0.00 ]
+Key: COMISDrr:  [ 0.00  0.00 ]
+Key: COMISDrr_Int:  [ 0.00  0.00 ]
+Key: COMISSrm:  [ 0.00  0.00 ]
+Key: COMISSrm_Int:  [ 0.00  0.00 ]
+Key: COMISSrr:  [ 0.00  0.00 ]
+Key: COMISSrr_Int:  [ 0.00  0.00 ]
+Key: COMP_FST:  [ 0.00  0.00 ]
+Key: COM_FIPr:  [ 0.00  0.00 ]
+Key: COM_FIr:  [ 0.00  0.00 ]
+Key: COM_FST:  [ 0.00  0.00 ]
+Key: COM_FpIr:  [ 0.00  0.00 ]
+Key: COM_Fpr:  [ 0.00  0.00 ]
+Key: CONVERGENCECTRL_ANCHOR:  [ 0.00  0.00 ]
+Key: CONVERGENCECTRL_ENTRY:  [ 0.00  0.00 ]
+Key: CONVERGENCECTRL_GLUE:  [ 0.00  0.00 ]
+Key: CONVERGENCECTRL_LOOP:  [ 0.00  0.00 ]
+Key: COPY:  [ 0.00  0.00 ]
+Key: COPY_TO_REGCLASS:  [ 0.00  0.00 ]
+Key: CPUID:  [ 0.00  0.00 ]
+Key: CQO:  [ 0.00  0.00 ]
+Key: CRC:  [ 0.00  0.00 ]
+Key: CS_PREFIX:  [ 0.00  0.00 ]
+Key: CTEST:  [ 0.00  0.00 ]
+Key: CVTDQ:  [ 0.00  0.00 ]
+Key: CVTPD:  [ 0.00  0.00 ]
+Key: CVTPS:  [ 0.00  0.00 ]
+Key: CVTSD:  [ 0.00  0.00 ]
+Key: CVTSI:  [ 0.00  0.00 ]
+Key: CVTSS:  [ 0.00  0.00 ]
+Key: CVTTPD:  [ 0.00  0.00 ]
+Key: CVTTPS:  [ 0.00  0.00 ]
+Key: CVTTSD:  [ 0.00  0.00 ]
+Key: CVTTSS:  [ 0.00  0.00 ]
+Key: CWD:  [ 0.00  0.00 ]
+Key: CWDE:  [ 0.00  0.00 ]
+Key: DAA:  [ 0.00  0.00 ]
+Key: DAS:  [ 0.00  0.00 ]
+Key: DATA:  [ 0.00  0.00 ]
+Key: DBG_INSTR_REF:  [ 0.00  0.00 ]
+Key: DBG_LABEL:  [ 0.00  0.00 ]
+Key: DBG_PHI:  [ 0.00  0.00 ]
+Key: DBG_VALUE:  [ 0.00  0.00 ]
+Key: DBG_VALUE_LIST:  [ 0.00  0.00 ]
+Key: DEC:  [ 0.00  0.00 ]
+Key: DIV:  [ 0.00  0.00 ]
+Key: DIVPDrm:  [ 0.00  0.00 ]
+Key: DIVPDrr:  [ 0.00  0.00 ]
+Key: DIVPSrm:  [ 0.00  0.00 ]
+Key: DIVPSrr:  [ 0.00  0.00 ]
+Key: DIVR_F:  [ 0.00  0.00 ]
+Key: DIVR_FI:  [ 0.00  0.00 ]
+Key: DIVR_FPrST:  [ 0.00  0.00 ]
+Key: DIVR_FST:  [ 0.00  0.00 ]
+Key: DIVR_Fp:  [ 0.00  0.00 ]
+Key: DIVR_FpI:  [ 0.00  0.00 ]
+Key: DIVR_FrST:  [ 0.00  0.00 ]
+Key: DIVSDrm:  [ 0.00  0.00 ]
+Key: DIVSDrm_Int:  [ 0.00  0.00 ]
+Key: DIVSDrr:  [ 0.00  0.00 ]
+Key: DIVSDrr_Int:  [ 0.00  0.00 ]
+Key: DIVSSrm:  [ 0.00  0.00 ]
+Key: DIVSSrm_Int:  [ 0.00  0.00 ]
+Key: DIVSSrr:  [ 0.00  0.00 ]
+Key: DIVSSrr_Int:  [ 0.00  0.00 ]
+Key: DIV_F:  [ 0.00  0.00 ]
+Key: DIV_FI:  [ 0.00  0.00 ]
+Key: DIV_FPrST:  [ 0.00  0.00 ]
+Key: DIV_FST:  [ 0.00  0.00 ]
+Key: DIV_Fp:  [ 0.00  0.00 ]
+Key: DIV_FpI:  [ 0.00  0.00 ]
+Key: DIV_FrST:  [ 0.00  0.00 ]
+Key: DPPDrmi:  [ 0.00  0.00 ]
+Key: DPPDrri:  [ 0.00  0.00 ]
+Key: DPPSrmi:  [ 0.00  0.00 ]
+Key: DPPSrri:  [ 0.00  0.00 ]
+Key: DS_PREFIX:  [ 0.00  0.00 ]
+Key: DYN_ALLOCA:  [ 0.00  0.00 ]
+Key: EH_LABEL:  [ 0.00  0.00 ]
+Key: EH_RETURN:  [ 0.00  0.00 ]
+Key: EH_SjLj_LongJmp:  [ 0.00  0.00 ]
+Key: EH_SjLj_SetJmp:  [ 0.00  0.00 ]
+Key: EH_SjLj_Setup:  [ 0.00  0.00 ]
+Key: ENCLS:  [ 0.00  0.00 ]
+Key: ENCLU:  [ 0.00  0.00 ]
+Key: ENCLV:  [ 0.00  0.00 ]
+Key: ENCODEKEY:  [ 0.00  0.00 ]
+Key: ENDBR:  [ 0.00  0.00 ]
+Key: ENQCMD:  [ 0.00  0.00 ]
+Key: ENQCMDS:  [ 0.00  0.00 ]
+Key: ENTER:  [ 0.00  0.00 ]
+Key: ERETS:  [ 0.00  0.00 ]
+Key: ERETU:  [ 0.00  0.00 ]
+Key: ES_PREFIX:  [ 0.00  0.00 ]
+Key: EXTRACTPSmri:  [ 0.00  0.00 ]
+Key: EXTRACTPSrri:  [ 0.00  0.00 ]
+Key: EXTRACT_SUBREG:  [ 0.00  0.00 ]
+Key: EXTRQ:  [ 0.00  0.00 ]
+Key: EXTRQI:  [ 0.00  0.00 ]
+Key: F:  [ 0.00  0.00 ]
+Key: FAKE_USE:  [ 0.00  0.00 ]
+Key: FARCALL:  [ 0.00  0.00 ]
+Key: FARJMP:  [ 0.00  0.00 ]
+Key: FAULTING_OP:  [ 0.00  0.00 ]
+Key: FBLDm:  [ 0.00  0.00 ]
+Key: FBSTPm:  [ 0.00  0.00 ]
+Key: FCOM:  [ 0.00  0.00 ]
+Key: FCOMP:  [ 0.00  0.00 ]
+Key: FCOMPP:  [ 0.00  0.00 ]
+Key: FCOS:  [ 0.00  0.00 ]
+Key: FDECSTP:  [ 0.00  0.00 ]
+Key: FEMMS:  [ 0.00  0.00 ]
+Key: FENTRY_CALL:  [ 0.00  0.00 ]
+Key: FFREE:  [ 0.00  0.00 ]
+Key: FFREEP:  [ 0.00  0.00 ]
+Key: FICOM:  [ 0.00  0.00 ]
+Key: FICOMP:  [ 0.00  0.00 ]
+Key: FINCSTP:  [ 0.00  0.00 ]
+Key: FLDCW:  [ 0.00  0.00 ]
+Key: FLDENVm:  [ 0.00  0.00 ]
+Key: FLDL:  [ 0.00  0.00 ]
+Key: FLDLG:  [ 0.00  0.00 ]
+Key: FLDLN:  [ 0.00  0.00 ]
+Key: FLDPI:  [ 0.00  0.00 ]
+Key: FNCLEX:  [ 0.00  0.00 ]
+Key: FNINIT:  [ 0.00  0.00 ]
+Key: FNOP:  [ 0.00  0.00 ]
+Key: FNSTCW:  [ 0.00  0.00 ]
+Key: FNSTSW:  [ 0.00  0.00 ]
+Key: FNSTSWm:  [ 0.00  0.00 ]
+Key: FP:  [ 0.00  0.00 ]
+Key: FPATAN:  [ 0.00  0.00 ]
+Key: FPREM:  [ 0.00  0.00 ]
+Key: FPTAN:  [ 0.00  0.00 ]
+Key: FRNDINT:  [ 0.00  0.00 ]
+Key: FRSTORm:  [ 0.00  0.00 ]
+Key: FSAVEm:  [ 0.00  0.00 ]
+Key: FSCALE:  [ 0.00  0.00 ]
+Key: FSIN:  [ 0.00  0.00 ]
+Key: FSINCOS:  [ 0.00  0.00 ]
+Key: FSTENVm:  [ 0.00  0.00 ]
+Key: FS_PREFIX:  [ 0.00  0.00 ]
+Key: FXRSTOR:  [ 0.00  0.00 ]
+Key: FXSAVE:  [ 0.00  0.00 ]
+Key: FXTRACT:  [ 0.00  0.00 ]
+Key: FYL:  [ 0.00  0.00 ]
+Key: FsFLD:  [ 0.00  0.00 ]
+Key: GC_LABEL:  [ 0.00  0.00 ]
+Key: GETSEC:  [ 0.00  0.00 ]
+Key: GF:  [ 0.00  0.00 ]
+Key: GS_PREFIX:  [ 0.00  0.00 ]
+Key: G_ABDS:  [ 0.00  0.00 ]
+Key: G_ABDU:  [ 0.00  0.00 ]
+Key: G_ABS:  [ 0.00  0.00 ]
+Key: G_ADD:  [ 0.00  0.00 ]
+Key: G_ADDRSPACE_CAST:  [ 0.00  0.00 ]
+Key: G_AND:  [ 0.00  0.00 ]
+Key: G_ANYEXT:  [ 0.00  0.00 ]
+Key: G_ASHR:  [ 0.00  0.00 ]
+Key: G_ASSERT_ALIGN:  [ 0.00  0.00 ]
+Key: G_ASSERT_SEXT:  [ 0.00  0.00 ]
+Key: G_ASSERT_ZEXT:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_ADD:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_AND:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FADD:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FMAX:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FMAXIMUM:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FMIN:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FMINIMUM:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FSUB:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_MAX:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_MIN:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_NAND:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_OR:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_SUB:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_UDEC_WRAP:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_UINC_WRAP:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_UMAX:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_UMIN:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_USUB_COND:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_USUB_SAT:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_XCHG:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_XOR:  [ 0.00  0.00 ]
+Key: G_ATOMIC_CMPXCHG:  [ 0.00  0.00 ]
+Key: G_ATOMIC_CMPXCHG_WITH_SUCCESS:  [ 0.00  0.00 ]
+Key: G_BITCAST:  [ 0.00  0.00 ]
+Key: G_BITREVERSE:  [ 0.00  0.00 ]
+Key: G_BLOCK_ADDR:  [ 0.00  0.00 ]
+Key: G_BR:  [ 0.00  0.00 ]
+Key: G_BRCOND:  [ 0.00  0.00 ]
+Key: G_BRINDIRECT:  [ 0.00  0.00 ]
+Key: G_BRJT:  [ 0.00  0.00 ]
+Key: G_BSWAP:  [ 0.00  0.00 ]
+Key: G_BUILD_VECTOR:  [ 0.00  0.00 ]
+Key: G_BUILD_VECTOR_TRUNC:  [ 0.00  0.00 ]
+Key: G_BZERO:  [ 0.00  0.00 ]
+Key: G_CONCAT_VECTORS:  [ 0.00  0.00 ]
+Key: G_CONSTANT:  [ 0.00  0.00 ]
+Key: G_CONSTANT_FOLD_BARRIER:  [ 0.00  0.00 ]
+Key: G_CONSTANT_POOL:  [ 0.00  0.00 ]
+Key: G_CTLZ:  [ 0.00  0.00 ]
+Key: G_CTLZ_ZERO_UNDEF:  [ 0.00  0.00 ]
+Key: G_CTPOP:  [ 0.00  0.00 ]
+Key: G_CTTZ:  [ 0.00  0.00 ]
+Key: G_CTTZ_ZERO_UNDEF:  [ 0.00  0.00 ]
+Key: G_DEBUGTRAP:  [ 0.00  0.00 ]
+Key: G_DYN_STACKALLOC:  [ 0.00  0.00 ]
+Key: G_EXTRACT:  [ 0.00  0.00 ]
+Key: G_EXTRACT_SUBVECTOR:  [ 0.00  0.00 ]
+Key: G_EXTRACT_VECTOR_ELT:  [ 0.00  0.00 ]
+Key: G_FABS:  [ 0.00  0.00 ]
+Key: G_FACOS:  [ 0.00  0.00 ]
+Key: G_FADD:  [ 0.00  0.00 ]
+Key: G_FASIN:  [ 0.00  0.00 ]
+Key: G_FATAN:  [ 0.00  0.00 ]
+Key: G_FCANONICALIZE:  [ 0.00  0.00 ]
+Key: G_FCEIL:  [ 0.00  0.00 ]
+Key: G_FCMP:  [ 0.00  0.00 ]
+Key: G_FCONSTANT:  [ 0.00  0.00 ]
+Key: G_FCOPYSIGN:  [ 0.00  0.00 ]
+Key: G_FCOS:  [ 0.00  0.00 ]
+Key: G_FCOSH:  [ 0.00  0.00 ]
+Key: G_FDIV:  [ 0.00  0.00 ]
+Key: G_FENCE:  [ 0.00  0.00 ]
+Key: G_FEXP:  [ 0.00  0.00 ]
+Key: G_FFLOOR:  [ 0.00  0.00 ]
+Key: G_FFREXP:  [ 0.00  0.00 ]
+Key: G_FILD:  [ 0.00  0.00 ]
+Key: G_FIST:  [ 0.00  0.00 ]
+Key: G_FLDCW:  [ 0.00  0.00 ]
+Key: G_FLDEXP:  [ 0.00  0.00 ]
+Key: G_FLOG:  [ 0.00  0.00 ]
+Key: G_FMA:  [ 0.00  0.00 ]
+Key: G_FMAD:  [ 0.00  0.00 ]
+Key: G_FMAXIMUM:  [ 0.00  0.00 ]
+Key: G_FMAXIMUMNUM:  [ 0.00  0.00 ]
+Key: G_FMAXNUM:  [ 0.00  0.00 ]
+Key: G_FMAXNUM_IEEE:  [ 0.00  0.00 ]
+Key: G_FMINIMUM:  [ 0.00  0.00 ]
+Key: G_FMINIMUMNUM:  [ 0.00  0.00 ]
+Key: G_FMINNUM:  [ 0.00  0.00 ]
+Key: G_FMINNUM_IEEE:  [ 0.00  0.00 ]
+Key: G_FMODF:  [ 0.00  0.00 ]
+Key: G_FMUL:  [ 0.00  0.00 ]
+Key: G_FNEARBYINT:  [ 0.00  0.00 ]
+Key: G_FNEG:  [ 0.00  0.00 ]
+Key: G_FNSTCW:  [ 0.00  0.00 ]
+Key: G_FPEXT:  [ 0.00  0.00 ]
+Key: G_FPOW:  [ 0.00  0.00 ]
+Key: G_FPOWI:  [ 0.00  0.00 ]
+Key: G_FPTOSI:  [ 0.00  0.00 ]
+Key: G_FPTOSI_SAT:  [ 0.00  0.00 ]
+Key: G_FPTOUI:  [ 0.00  0.00 ]
+Key: G_FPTOUI_SAT:  [ 0.00  0.00 ]
+Key: G_FPTRUNC:  [ 0.00  0.00 ]
+Key: G_FRAME_INDEX:  [ 0.00  0.00 ]
+Key: G_FREEZE:  [ 0.00  0.00 ]
+Key: G_FREM:  [ 0.00  0.00 ]
+Key: G_FRINT:  [ 0.00  0.00 ]
+Key: G_FSHL:  [ 0.00  0.00 ]
+Key: G_FSHR:  [ 0.00  0.00 ]
+Key: G_FSIN:  [ 0.00  0.00 ]
+Key: G_FSINCOS:  [ 0.00  0.00 ]
+Key: G_FSINH:  [ 0.00  0.00 ]
+Key: G_FSQRT:  [ 0.00  0.00 ]
+Key: G_FSUB:  [ 0.00  0.00 ]
+Key: G_FTAN:  [ 0.00  0.00 ]
+Key: G_FTANH:  [ 0.00  0.00 ]
+Key: G_GET_FPENV:  [ 0.00  0.00 ]
+Key: G_GET_FPMODE:  [ 0.00  0.00 ]
+Key: G_GET_ROUNDING:  [ 0.00  0.00 ]
+Key: G_GLOBAL_VALUE:  [ 0.00  0.00 ]
+Key: G_ICMP:  [ 0.00  0.00 ]
+Key: G_IMPLICIT_DEF:  [ 0.00  0.00 ]
+Key: G_INDEXED_LOAD:  [ 0.00  0.00 ]
+Key: G_INDEXED_SEXTLOAD:  [ 0.00  0.00 ]
+Key: G_INDEXED_STORE:  [ 0.00  0.00 ]
+Key: G_INDEXED_ZEXTLOAD:  [ 0.00  0.00 ]
+Key: G_INSERT:  [ 0.00  0.00 ]
+Key: G_INSERT_SUBVECTOR:  [ 0.00  0.00 ]
+Key: G_INSERT_VECTOR_ELT:  [ 0.00  0.00 ]
+Key: G_INTRINSIC:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_CONVERGENT:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_FPTRUNC_ROUND:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_LLRINT:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_LRINT:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_ROUND:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_ROUNDEVEN:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_TRUNC:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_W_SIDE_EFFECTS:  [ 0.00  0.00 ]
+Key: G_INTTOPTR:  [ 0.00  0.00 ]
+Key: G_INVOKE_REGION_START:  [ 0.00  0.00 ]
+Key: G_IS_FPCLASS:  [ 0.00  0.00 ]
+Key: G_JUMP_TABLE:  [ 0.00  0.00 ]
+Key: G_LLROUND:  [ 0.00  0.00 ]
+Key: G_LOAD:  [ 0.00  0.00 ]
+Key: G_LROUND:  [ 0.00  0.00 ]
+Key: G_LSHR:  [ 0.00  0.00 ]
+Key: G_MEMCPY:  [ 0.00  0.00 ]
+Key: G_MEMCPY_INLINE:  [ 0.00  0.00 ]
+Key: G_MEMMOVE:  [ 0.00  0.00 ]
+Key: G_MEMSET:  [ 0.00  0.00 ]
+Key: G_MERGE_VALUES:  [ 0.00  0.00 ]
+Key: G_MUL:  [ 0.00  0.00 ]
+Key: G_OR:  [ 0.00  0.00 ]
+Key: G_PHI:  [ 0.00  0.00 ]
+Key: G_PREFETCH:  [ 0.00  0.00 ]
+Key: G_PTRAUTH_GLOBAL_VALUE:  [ 0.00  0.00 ]
+Key: G_PTRMASK:  [ 0.00  0.00 ]
+Key: G_PTRTOINT:  [ 0.00  0.00 ]
+Key: G_PTR_ADD:  [ 0.00  0.00 ]
+Key: G_READCYCLECOUNTER:  [ 0.00  0.00 ]
+Key: G_READSTEADYCOUNTER:  [ 0.00  0.00 ]
+Key: G_READ_REGISTER:  [ 0.00  0.00 ]
+Key: G_RESET_FPENV:  [ 0.00  0.00 ]
+Key: G_RESET_FPMODE:  [ 0.00  0.00 ]
+Key: G_ROTL:  [ 0.00  0.00 ]
+Key: G_ROTR:  [ 0.00  0.00 ]
+Key: G_SADDE:  [ 0.00  0.00 ]
+Key: G_SADDO:  [ 0.00  0.00 ]
+Key: G_SADDSAT:  [ 0.00  0.00 ]
+Key: G_SBFX:  [ 0.00  0.00 ]
+Key: G_SCMP:  [ 0.00  0.00 ]
+Key: G_SDIV:  [ 0.00  0.00 ]
+Key: G_SDIVFIX:  [ 0.00  0.00 ]
+Key: G_SDIVFIXSAT:  [ 0.00  0.00 ]
+Key: G_SDIVREM:  [ 0.00  0.00 ]
+Key: G_SELECT:  [ 0.00  0.00 ]
+Key: G_SET_FPENV:  [ 0.00  0.00 ]
+Key: G_SET_FPMODE:  [ 0.00  0.00 ]
+Key: G_SET_ROUNDING:  [ 0.00  0.00 ]
+Key: G_SEXT:  [ 0.00  0.00 ]
+Key: G_SEXTLOAD:  [ 0.00  0.00 ]
+Key: G_SEXT_INREG:  [ 0.00  0.00 ]
+Key: G_SHL:  [ 0.00  0.00 ]
+Key: G_SHUFFLE_VECTOR:  [ 0.00  0.00 ]
+Key: G_SITOFP:  [ 0.00  0.00 ]
+Key: G_SMAX:  [ 0.00  0.00 ]
+Key: G_SMIN:  [ 0.00  0.00 ]
+Key: G_SMULFIX:  [ 0.00  0.00 ]
+Key: G_SMULFIXSAT:  [ 0.00  0.00 ]
+Key: G_SMULH:  [ 0.00  0.00 ]
+Key: G_SMULO:  [ 0.00  0.00 ]
+Key: G_SPLAT_VECTOR:  [ 0.00  0.00 ]
+Key: G_SREM:  [ 0.00  0.00 ]
+Key: G_SSHLSAT:  [ 0.00  0.00 ]
+Key: G_SSUBE:  [ 0.00  0.00 ]
+Key: G_SSUBO:  [ 0.00  0.00 ]
+Key: G_SSUBSAT:  [ 0.00  0.00 ]
+Key: G_STACKRESTORE:  [ 0.00  0.00 ]
+Key: G_STACKSAVE:  [ 0.00  0.00 ]
+Key: G_STEP_VECTOR:  [ 0.00  0.00 ]
+Key: G_STORE:  [ 0.00  0.00 ]
+Key: G_STRICT_FADD:  [ 0.00  0.00 ]
+Key: G_STRICT_FDIV:  [ 0.00  0.00 ]
+Key: G_STRICT_FLDEXP:  [ 0.00  0.00 ]
+Key: G_STRICT_FMA:  [ 0.00  0.00 ]
+Key: G_STRICT_FMUL:  [ 0.00  0.00 ]
+Key: G_STRICT_FREM:  [ 0.00  0.00 ]
+Key: G_STRICT_FSQRT:  [ 0.00  0.00 ]
+Key: G_STRICT_FSUB:  [ 0.00  0.00 ]
+Key: G_SUB:  [ 0.00  0.00 ]
+Key: G_TRAP:  [ 0.00  0.00 ]
+Key: G_TRUNC:  [ 0.00  0.00 ]
+Key: G_TRUNC_SSAT_S:  [ 0.00  0.00 ]
+Key: G_TRUNC_SSAT_U:  [ 0.00  0.00 ]
+Key: G_TRUNC_USAT_U:  [ 0.00  0.00 ]
+Key: G_UADDE:  [ 0.00  0.00 ]
+Key: G_UADDO:  [ 0.00  0.00 ]
+Key: G_UADDSAT:  [ 0.00  0.00 ]
+Key: G_UBFX:  [ 0.00  0.00 ]
+Key: G_UBSANTRAP:  [ 0.00  0.00 ]
+Key: G_UCMP:  [ 0.00  0.00 ]
+Key: G_UDIV:  [ 0.00  0.00 ]
+Key: G_UDIVFIX:  [ 0.00  0.00 ]
+Key: G_UDIVFIXSAT:  [ 0.00  0.00 ]
+Key: G_UDIVREM:  [ 0.00  0.00 ]
+Key: G_UITOFP:  [ 0.00  0.00 ]
+Key: G_UMAX:  [ 0.00  0.00 ]
+Key: G_UMIN:  [ 0.00  0.00 ]
+Key: G_UMULFIX:  [ 0.00  0.00 ]
+Key: G_UMULFIXSAT:  [ 0.00  0.00 ]
+Key: G_UMULH:  [ 0.00  0.00 ]
+Key: G_UMULO:  [ 0.00  0.00 ]
+Key: G_UNMERGE_VALUES:  [ 0.00  0.00 ]
+Key: G_UREM:  [ 0.00  0.00 ]
+Key: G_USHLSAT:  [ 0.00  0.00 ]
+Key: G_USUBE:  [ 0.00  0.00 ]
+Key: G_USUBO:  [ 0.00  0.00 ]
+Key: G_USUBSAT:  [ 0.00  0.00 ]
+Key: G_VAARG:  [ 0.00  0.00 ]
+Key: G_VASTART:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_ADD:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_AND:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FADD:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMAX:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMAXIMUM:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMIN:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMINIMUM:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMUL:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_MUL:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_OR:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_SEQ_FADD:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_SEQ_FMUL:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_SMAX:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_SMIN:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_UMAX:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_UMIN:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_XOR:  [ 0.00  0.00 ]
+Key: G_VECTOR_COMPRESS:  [ 0.00  0.00 ]
+Key: G_VSCALE:  [ 0.00  0.00 ]
+Key: G_WRITE_REGISTER:  [ 0.00  0.00 ]
+Key: G_XOR:  [ 0.00  0.00 ]
+Key: G_ZEXT:  [ 0.00  0.00 ]
+Key: G_ZEXTLOAD:  [ 0.00  0.00 ]
+Key: HADDPDrm:  [ 0.00  0.00 ]
+Key: HADDPDrr:  [ 0.00  0.00 ]
+Key: HADDPSrm:  [ 0.00  0.00 ]
+Key: HADDPSrr:  [ 0.00  0.00 ]
+Key: HLT:  [ 0.00  0.00 ]
+Key: HRESET:  [ 0.00  0.00 ]
+Key: HSUBPDrm:  [ 0.00  0.00 ]
+Key: HSUBPDrr:  [ 0.00  0.00 ]
+Key: HSUBPSrm:  [ 0.00  0.00 ]
+Key: HSUBPSrr:  [ 0.00  0.00 ]
+Key: ICALL_BRANCH_FUNNEL:  [ 0.00  0.00 ]
+Key: IDIV:  [ 0.00  0.00 ]
+Key: ILD_F:  [ 0.00  0.00 ]
+Key: ILD_Fp:  [ 0.00  0.00 ]
+Key: IMPLICIT_DEF:  [ 0.00  0.00 ]
+Key: IMUL:  [ 0.00  0.00 ]
+Key: IMULZU:  [ 0.00  0.00 ]
+Key: IN:  [ 0.00  0.00 ]
+Key: INC:  [ 0.00  0.00 ]
+Key: INCSSPD:  [ 0.00  0.00 ]
+Key: INCSSPQ:  [ 0.00  0.00 ]
+Key: INDIRECT_THUNK_CALL:  [ 0.00  0.00 ]
+Key: INDIRECT_THUNK_TCRETURN:  [ 0.00  0.00 ]
+Key: INIT_UNDEF:  [ 0.00  0.00 ]
+Key: INLINEASM:  [ 0.00  0.00 ]
+Key: INLINEASM_BR:  [ 0.00  0.00 ]
+Key: INSB:  [ 0.00  0.00 ]
+Key: INSERTPSrmi:  [ 0.00  0.00 ]
+Key: INSERTPSrri:  [ 0.00  0.00 ]
+Key: INSERTQ:  [ 0.00  0.00 ]
+Key: INSERTQI:  [ 0.00  0.00 ]
+Key: INSERT_SUBREG:  [ 0.00  0.00 ]
+Key: INSL:  [ 0.00  0.00 ]
+Key: INSW:  [ 0.00  0.00 ]
+Key: INT:  [ 0.00  0.00 ]
+Key: INTO:  [ 0.00  0.00 ]
+Key: INVD:  [ 0.00  0.00 ]
+Key: INVEPT:  [ 0.00  0.00 ]
+Key: INVLPG:  [ 0.00  0.00 ]
+Key: INVLPGA:  [ 0.00  0.00 ]
+Key: INVLPGB:  [ 0.00  0.00 ]
+Key: INVPCID:  [ 0.00  0.00 ]
+Key: INVVPID:  [ 0.00  0.00 ]
+Key: IRET:  [ 0.00  0.00 ]
+Key: ISTT_FP:  [ 0.00  0.00 ]
+Key: ISTT_Fp:  [ 0.00  0.00 ]
+Key: IST_F:  [ 0.00  0.00 ]
+Key: IST_FP:  [ 0.00  0.00 ]
+Key: IST_Fp:  [ 0.00  0.00 ]
+Key: Int_eh_sjlj_setup_dispatch:  [ 0.00  0.00 ]
+Key: JCC:  [ 0.00  0.00 ]
+Key: JCXZ:  [ 0.00  0.00 ]
+Key: JECXZ:  [ 0.00  0.00 ]
+Key: JMP:  [ 0.00  0.00 ]
+Key: JMPABS:  [ 0.00  0.00 ]
+Key: JRCXZ:  [ 0.00  0.00 ]
+Key: JUMP_TABLE_DEBUG_INFO:  [ 0.00  0.00 ]
+Key: KADDBkk:  [ 0.00  0.00 ]
+Key: KADDDkk:  [ 0.00  0.00 ]
+Key: KADDQkk:  [ 0.00  0.00 ]
+Key: KADDWkk:  [ 0.00  0.00 ]
+Key: KANDBkk:  [ 0.00  0.00 ]
+Key: KANDDkk:  [ 0.00  0.00 ]
+Key: KANDNBkk:  [ 0.00  0.00 ]
+Key: KANDNDkk:  [ 0.00  0.00 ]
+Key: KANDNQkk:  [ 0.00  0.00 ]
+Key: KANDNWkk:  [ 0.00  0.00 ]
+Key: KANDQkk:  [ 0.00  0.00 ]
+Key: KANDWkk:  [ 0.00  0.00 ]
+Key: KCFI_CHECK:  [ 0.00  0.00 ]
+Key: KILL:  [ 0.00  0.00 ]
+Key: KMOVBkk:  [ 0.00  0.00 ]
+Key: KMOVBkk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVBkm:  [ 0.00  0.00 ]
+Key: KMOVBkm_EVEX:  [ 0.00  0.00 ]
+Key: KMOVBkr:  [ 0.00  0.00 ]
+Key: KMOVBkr_EVEX:  [ 0.00  0.00 ]
+Key: KMOVBmk:  [ 0.00  0.00 ]
+Key: KMOVBmk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVBrk:  [ 0.00  0.00 ]
+Key: KMOVBrk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDkk:  [ 0.00  0.00 ]
+Key: KMOVDkk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDkm:  [ 0.00  0.00 ]
+Key: KMOVDkm_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDkr:  [ 0.00  0.00 ]
+Key: KMOVDkr_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDmk:  [ 0.00  0.00 ]
+Key: KMOVDmk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDrk:  [ 0.00  0.00 ]
+Key: KMOVDrk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQkk:  [ 0.00  0.00 ]
+Key: KMOVQkk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQkm:  [ 0.00  0.00 ]
+Key: KMOVQkm_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQkr:  [ 0.00  0.00 ]
+Key: KMOVQkr_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQmk:  [ 0.00  0.00 ]
+Key: KMOVQmk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQrk:  [ 0.00  0.00 ]
+Key: KMOVQrk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWkk:  [ 0.00  0.00 ]
+Key: KMOVWkk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWkm:  [ 0.00  0.00 ]
+Key: KMOVWkm_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWkr:  [ 0.00  0.00 ]
+Key: KMOVWkr_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWmk:  [ 0.00  0.00 ]
+Key: KMOVWmk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWrk:  [ 0.00  0.00 ]
+Key: KMOVWrk_EVEX:  [ 0.00  0.00 ]
+Key: KNOTBkk:  [ 0.00  0.00 ]
+Key: KNOTDkk:  [ 0.00  0.00 ]
+Key: KNOTQkk:  [ 0.00  0.00 ]
+Key: KNOTWkk:  [ 0.00  0.00 ]
+Key: KORBkk:  [ 0.00  0.00 ]
+Key: KORDkk:  [ 0.00  0.00 ]
+Key: KORQkk:  [ 0.00  0.00 ]
+Key: KORTESTBkk:  [ 0.00  0.00 ]
+Key: KORTESTDkk:  [ 0.00  0.00 ]
+Key: KORTESTQkk:  [ 0.00  0.00 ]
+Key: KORTESTWkk:  [ 0.00  0.00 ]
+Key: KORWkk:  [ 0.00  0.00 ]
+Key: KSET:  [ 0.00  0.00 ]
+Key: KSHIFTLBki:  [ 0.00  0.00 ]
+Key: KSHIFTLDki:  [ 0.00  0.00 ]
+Key: KSHIFTLQki:  [ 0.00  0.00 ]
+Key: KSHIFTLWki:  [ 0.00  0.00 ]
+Key: KSHIFTRBki:  [ 0.00  0.00 ]
+Key: KSHIFTRDki:  [ 0.00  0.00 ]
+Key: KSHIFTRQki:  [ 0.00  0.00 ]
+Key: KSHIFTRWki:  [ 0.00  0.00 ]
+Key: KTESTBkk:  [ 0.00  0.00 ]
+Key: KTESTDkk:  [ 0.00  0.00 ]
+Key: KTESTQkk:  [ 0.00  0.00 ]
+Key: KTESTWkk:  [ 0.00  0.00 ]
+Key: KUNPCKBWkk:  [ 0.00  0.00 ]
+Key: KUNPCKDQkk:  [ 0.00  0.00 ]
+Key: KUNPCKWDkk:  [ 0.00  0.00 ]
+Key: KXNORBkk:  [ 0.00  0.00 ]
+Key: KXNORDkk:  [ 0.00  0.00 ]
+Key: KXNORQkk:  [ 0.00  0.00 ]
+Key: KXNORWkk:  [ 0.00  0.00 ]
+Key: KXORBkk:  [ 0.00  0.00 ]
+Key: KXORDkk:  [ 0.00  0.00 ]
+Key: KXORQkk:  [ 0.00  0.00 ]
+Key: KXORWkk:  [ 0.00  0.00 ]
+Key: LAHF:  [ 0.00  0.00 ]
+Key: LAR:  [ 0.00  0.00 ]
+Key: LCMPXCHG:  [ 0.00  0.00 ]
+Key: LDDQUrm:  [ 0.00  0.00 ]
+Key: LDMXCSR:  [ 0.00  0.00 ]
+Key: LDS:  [ 0.00  0.00 ]
+Key: LDTILECFG:  [ 0.00  0.00 ]
+Key: LDTILECFG_EVEX:  [ 0.00  0.00 ]
+Key: LD_F:  [ 0.00  0.00 ]
+Key: LD_Fp:  [ 0.00  0.00 ]
+Key: LD_Frr:  [ 0.00  0.00 ]
+Key: LEA:  [ 0.00  0.00 ]
+Key: LEAVE:  [ 0.00  0.00 ]
+Key: LES:  [ 0.00  0.00 ]
+Key: LFENCE:  [ 0.00  0.00 ]
+Key: LFS:  [ 0.00  0.00 ]
+Key: LGDT:  [ 0.00  0.00 ]
+Key: LGS:  [ 0.00  0.00 ]
+Key: LIDT:  [ 0.00  0.00 ]
+Key: LIFETIME_END:  [ 0.00  0.00 ]
+Key: LIFETIME_START:  [ 0.00  0.00 ]
+Key: LKGS:  [ 0.00  0.00 ]
+Key: LLDT:  [ 0.00  0.00 ]
+Key: LLWPCB:  [ 0.00  0.00 ]
+Key: LMSW:  [ 0.00  0.00 ]
+Key: LOADIWKEY:  [ 0.00  0.00 ]
+Key: LOAD_STACK_GUARD:  [ 0.00  0.00 ]
+Key: LOCAL_ESCAPE:  [ 0.00  0.00 ]
+Key: LOCK_ADD:  [ 0.00  0.00 ]
+Key: LOCK_AND:  [ 0.00  0.00 ]
+Key: LOCK_BTC:  [ 0.00  0.00 ]
+Key: LOCK_BTC_RM:  [ 0.00  0.00 ]
+Key: LOCK_BTR:  [ 0.00  0.00 ]
+Key: LOCK_BTR_RM:  [ 0.00  0.00 ]
+Key: LOCK_BTS:  [ 0.00  0.00 ]
+Key: LOCK_BTS_RM:  [ 0.00  0.00 ]
+Key: LOCK_DEC:  [ 0.00  0.00 ]
+Key: LOCK_INC:  [ 0.00  0.00 ]
+Key: LOCK_OR:  [ 0.00  0.00 ]
+Key: LOCK_PREFIX:  [ 0.00  0.00 ]
+Key: LOCK_SUB:  [ 0.00  0.00 ]
+Key: LOCK_XOR:  [ 0.00  0.00 ]
+Key: LODSB:  [ 0.00  0.00 ]
+Key: LODSL:  [ 0.00  0.00 ]
+Key: LODSQ:  [ 0.00  0.00 ]
+Key: LODSW:  [ 0.00  0.00 ]
+Key: LOOP:  [ 0.00  0.00 ]
+Key: LOOPE:  [ 0.00  0.00 ]
+Key: LOOPNE:  [ 0.00  0.00 ]
+Key: LRET:  [ 0.00  0.00 ]
+Key: LRETI:  [ 0.00  0.00 ]
+Key: LSL:  [ 0.00  0.00 ]
+Key: LSS:  [ 0.00  0.00 ]
+Key: LTRm:  [ 0.00  0.00 ]
+Key: LTRr:  [ 0.00  0.00 ]
+Key: LWPINS:  [ 0.00  0.00 ]
+Key: LWPVAL:  [ 0.00  0.00 ]
+Key: LXADD:  [ 0.00  0.00 ]
+Key: LZCNT:  [ 0.00  0.00 ]
+Key: MASKMOVDQU:  [ 0.00  0.00 ]
+Key: MASKPAIR:  [ 0.00  0.00 ]
+Key: MAXCPDrm:  [ 0.00  0.00 ]
+Key: MAXCPDrr:  [ 0.00  0.00 ]
+Key: MAXCPSrm:  [ 0.00  0.00 ]
+Key: MAXCPSrr:  [ 0.00  0.00 ]
+Key: MAXCSDrm:  [ 0.00  0.00 ]
+Key: MAXCSDrr:  [ 0.00  0.00 ]
+Key: MAXCSSrm:  [ 0.00  0.00 ]
+Key: MAXCSSrr:  [ 0.00  0.00 ]
+Key: MAXPDrm:  [ 0.00  0.00 ]
+Key: MAXPDrr:  [ 0.00  0.00 ]
+Key: MAXPSrm:  [ 0.00  0.00 ]
+Key: MAXPSrr:  [ 0.00  0.00 ]
+Key: MAXSDrm:  [ 0.00  0.00 ]
+Key: MAXSDrm_Int:  [ 0.00  0.00 ]
+Key: MAXSDrr:  [ 0.00  0.00 ]
+Key: MAXSDrr_Int:  [ 0.00  0.00 ]
+Key: MAXSSrm:  [ 0.00  0.00 ]
+Key: MAXSSrm_Int:  [ 0.00  0.00 ]
+Key: MAXSSrr:  [ 0.00  0.00 ]
+Key: MAXSSrr_Int:  [ 0.00  0.00 ]
+Key: MEMBARRIER:  [ 0.00  0.00 ]
+Key: MFENCE:  [ 0.00  0.00 ]
+Key: MINCPDrm:  [ 0.00  0.00 ]
+Key: MINCPDrr:  [ 0.00  0.00 ]
+Key: MINCPSrm:  [ 0.00  0.00 ]
+Key: MINCPSrr:  [ 0.00  0.00 ]
+Key: MINCSDrm:  [ 0.00  0.00 ]
+Key: MINCSDrr:  [ 0.00  0.00 ]
+Key: MINCSSrm:  [ 0.00  0.00 ]
+Key: MINCSSrr:  [ 0.00  0.00 ]
+Key: MINPDrm:  [ 0.00  0.00 ]
+Key: MINPDrr:  [ 0.00  0.00 ]
+Key: MINPSrm:  [ 0.00  0.00 ]
+Key: MINPSrr:  [ 0.00  0.00 ]
+Key: MINSDrm:  [ 0.00  0.00 ]
+Key: MINSDrm_Int:  [ 0.00  0.00 ]
+Key: MINSDrr:  [ 0.00  0.00 ]
+Key: MINSDrr_Int:  [ 0.00  0.00 ]
+Key: MINSSrm:  [ 0.00  0.00 ]
+Key: MINSSrm_Int:  [ 0.00  0.00 ]
+Key: MINSSrr:  [ 0.00  0.00 ]
+Key: MINSSrr_Int:  [ 0.00  0.00 ]
+Key: MMX_CVTPD:  [ 0.00  0.00 ]
+Key: MMX_CVTPI:  [ 0.00  0.00 ]
+Key: MMX_CVTPS:  [ 0.00  0.00 ]
+Key: MMX_CVTTPD:  [ 0.00  0.00 ]
+Key: MMX_CVTTPS:  [ 0.00  0.00 ]
+Key: MMX_EMMS:  [ 0.00  0.00 ]
+Key: MMX_MASKMOVQ:  [ 0.00  0.00 ]
+Key: MMX_MOVD:  [ 0.00  0.00 ]
+Key: MMX_MOVDQ:  [ 0.00  0.00 ]
+Key: MMX_MOVFR:  [ 0.00  0.00 ]
+Key: MMX_MOVNTQmr:  [ 0.00  0.00 ]
+Key: MMX_MOVQ:  [ 0.00  0.00 ]
+Key: MMX_PABSBrm:  [ 0.00  0.00 ]
+Key: MMX_PABSBrr:  [ 0.00  0.00 ]
+Key: MMX_PABSDrm:  [ 0.00  0.00 ]
+Key: MMX_PABSDrr:  [ 0.00  0.00 ]
+Key: MMX_PABSWrm:  [ 0.00  0.00 ]
+Key: MMX_PABSWrr:  [ 0.00  0.00 ]
+Key: MMX_PACKSSDWrm:  [ 0.00  0.00 ]
+Key: MMX_PACKSSDWrr:  [ 0.00  0.00 ]
+Key: MMX_PACKSSWBrm:  [ 0.00  0.00 ]
+Key: MMX_PACKSSWBrr:  [ 0.00  0.00 ]
+Key: MMX_PACKUSWBrm:  [ 0.00  0.00 ]
+Key: MMX_PACKUSWBrr:  [ 0.00  0.00 ]
+Key: MMX_PADDBrm:  [ 0.00  0.00 ]
+Key: MMX_PADDBrr:  [ 0.00  0.00 ]
+Key: MMX_PADDDrm:  [ 0.00  0.00 ]
+Key: MMX_PADDDrr:  [ 0.00  0.00 ]
+Key: MMX_PADDQrm:  [ 0.00  0.00 ]
+Key: MMX_PADDQrr:  [ 0.00  0.00 ]
+Key: MMX_PADDSBrm:  [ 0.00  0.00 ]
+Key: MMX_PADDSBrr:  [ 0.00  0.00 ]
+Key: MMX_PADDSWrm:  [ 0.00  0.00 ]
+Key: MMX_PADDSWrr:  [ 0.00  0.00 ]
+Key: MMX_PADDUSBrm:  [ 0.00  0.00 ]
+Key: MMX_PADDUSBrr:  [ 0.00  0.00 ]
+Key: MMX_PADDUSWrm:  [ 0.00  0.00 ]
+Key: MMX_PADDUSWrr:  [ 0.00  0.00 ]
+Key: MMX_PADDWrm:  [ 0.00  0.00 ]
+Key: MMX_PADDWrr:  [ 0.00  0.00 ]
+Key: MMX_PALIGNRrmi:  [ 0.00  0.00 ]
+Key: MMX_PALIGNRrri:  [ 0.00  0.00 ]
+Key: MMX_PANDNrm:  [ 0.00  0.00 ]
+Key: MMX_PANDNrr:  [ 0.00  0.00 ]
+Key: MMX_PANDrm:  [ 0.00  0.00 ]
+Key: MMX_PANDrr:  [ 0.00  0.00 ]
+Key: MMX_PAVGBrm:  [ 0.00  0.00 ]
+Key: MMX_PAVGBrr:  [ 0.00  0.00 ]
+Key: MMX_PAVGWrm:  [ 0.00  0.00 ]
+Key: MMX_PAVGWrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQBrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQBrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQDrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQDrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQWrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQWrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTBrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTBrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTDrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTDrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTWrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTWrr:  [ 0.00  0.00 ]
+Key: MMX_PEXTRWrri:  [ 0.00  0.00 ]
+Key: MMX_PHADDDrm:  [ 0.00  0.00 ]
+Key: MMX_PHADDDrr:  [ 0.00  0.00 ]
+Key: MMX_PHADDSWrm:  [ 0.00  0.00 ]
+Key: MMX_PHADDSWrr:  [ 0.00  0.00 ]
+Key: MMX_PHADDWrm:  [ 0.00  0.00 ]
+Key: MMX_PHADDWrr:  [ 0.00  0.00 ]
+Key: MMX_PHSUBDrm:  [ 0.00  0.00 ]
+Key: MMX_PHSUBDrr:  [ 0.00  0.00 ]
+Key: MMX_PHSUBSWrm:  [ 0.00  0.00 ]
+Key: MMX_PHSUBSWrr:  [ 0.00  0.00 ]
+Key: MMX_PHSUBWrm:  [ 0.00  0.00 ]
+Key: MMX_PHSUBWrr:  [ 0.00  0.00 ]
+Key: MMX_PINSRWrmi:  [ 0.00  0.00 ]
+Key: MMX_PINSRWrri:  [ 0.00  0.00 ]
+Key: MMX_PMADDUBSWrm:  [ 0.00  0.00 ]
+Key: MMX_PMADDUBSWrr:  [ 0.00  0.00 ]
+Key: MMX_PMADDWDrm:  [ 0.00  0.00 ]
+Key: MMX_PMADDWDrr:  [ 0.00  0.00 ]
+Key: MMX_PMAXSWrm:  [ 0.00  0.00 ]
+Key: MMX_PMAXSWrr:  [ 0.00  0.00 ]
+Key: MMX_PMAXUBrm:  [ 0.00  0.00 ]
+Key: MMX_PMAXUBrr:  [ 0.00  0.00 ]
+Key: MMX_PMINSWrm:  [ 0.00  0.00 ]
+Key: MMX_PMINSWrr:  [ 0.00  0.00 ]
+Key: MMX_PMINUBrm:  [ 0.00  0.00 ]
+Key: MMX_PMINUBrr:  [ 0.00  0.00 ]
+Key: MMX_PMOVMSKBrr:  [ 0.00  0.00 ]
+Key: MMX_PMULHRSWrm:  [ 0.00  0.00 ]
+Key: MMX_PMULHRSWrr:  [ 0.00  0.00 ]
+Key: MMX_PMULHUWrm:  [ 0.00  0.00 ]
+Key: MMX_PMULHUWrr:  [ 0.00  0.00 ]
+Key: MMX_PMULHWrm:  [ 0.00  0.00 ]
+Key: MMX_PMULHWrr:  [ 0.00  0.00 ]
+Key: MMX_PMULLWrm:  [ 0.00  0.00 ]
+Key: MMX_PMULLWrr:  [ 0.00  0.00 ]
+Key: MMX_PMULUDQrm:  [ 0.00  0.00 ]
+Key: MMX_PMULUDQrr:  [ 0.00  0.00 ]
+Key: MMX_PORrm:  [ 0.00  0.00 ]
+Key: MMX_PORrr:  [ 0.00  0.00 ]
+Key: MMX_PSADBWrm:  [ 0.00  0.00 ]
+Key: MMX_PSADBWrr:  [ 0.00  0.00 ]
+Key: MMX_PSHUFBrm:  [ 0.00  0.00 ]
+Key: MMX_PSHUFBrr:  [ 0.00  0.00 ]
+Key: MMX_PSHUFWmi:  [ 0.00  0.00 ]
+Key: MMX_PSHUFWri:  [ 0.00  0.00 ]
+Key: MMX_PSIGNBrm:  [ 0.00  0.00 ]
+Key: MMX_PSIGNBrr:  [ 0.00  0.00 ]
+Key: MMX_PSIGNDrm:  [ 0.00  0.00 ]
+Key: MMX_PSIGNDrr:  [ 0.00  0.00 ]
+Key: MMX_PSIGNWrm:  [ 0.00  0.00 ]
+Key: MMX_PSIGNWrr:  [ 0.00  0.00 ]
+Key: MMX_PSLLDri:  [ 0.00  0.00 ]
+Key: MMX_PSLLDrm:  [ 0.00  0.00 ]
+Key: MMX_PSLLDrr:  [ 0.00  0.00 ]
+Key: MMX_PSLLQri:  [ 0.00  0.00 ]
+Key: MMX_PSLLQrm:  [ 0.00  0.00 ]
+Key: MMX_PSLLQrr:  [ 0.00  0.00 ]
+Key: MMX_PSLLWri:  [ 0.00  0.00 ]
+Key: MMX_PSLLWrm:  [ 0.00  0.00 ]
+Key: MMX_PSLLWrr:  [ 0.00  0.00 ]
+Key: MMX_PSRADri:  [ 0.00  0.00 ]
+Key: MMX_PSRADrm:  [ 0.00  0.00 ]
+Key: MMX_PSRADrr:  [ 0.00  0.00 ]
+Key: MMX_PSRAWri:  [ 0.00  0.00 ]
+Key: MMX_PSRAWrm:  [ 0.00  0.00 ]
+Key: MMX_PSRAWrr:  [ 0.00  0.00 ]
+Key: MMX_PSRLDri:  [ 0.00  0.00 ]
+Key: MMX_PSRLDrm:  [ 0.00  0.00 ]
+Key: MMX_PSRLDrr:  [ 0.00  0.00 ]
+Key: MMX_PSRLQri:  [ 0.00  0.00 ]
+Key: MMX_PSRLQrm:  [ 0.00  0.00 ]
+Key: MMX_PSRLQrr:  [ 0.00  0.00 ]
+Key: MMX_PSRLWri:  [ 0.00  0.00 ]
+Key: MMX_PSRLWrm:  [ 0.00  0.00 ]
+Key: MMX_PSRLWrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBBrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBBrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBDrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBDrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBQrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBQrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBSBrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBSBrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBSWrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBSWrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBUSBrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBUSBrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBUSWrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBUSWrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBWrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBWrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHBWrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHBWrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHDQrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHDQrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHWDrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHWDrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLBWrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLBWrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLDQrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLDQrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLWDrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLWDrr:  [ 0.00  0.00 ]
+Key: MMX_PXORrm:  [ 0.00  0.00 ]
+Key: MMX_PXORrr:  [ 0.00  0.00 ]
+Key: MMX_SET:  [ 0.00  0.00 ]
+Key: MONITOR:  [ 0.00  0.00 ]
+Key: MONITORX:  [ 0.00  0.00 ]
+Key: MONTMUL:  [ 0.00  0.00 ]
+Key: MORESTACK_RET:  [ 0.00  0.00 ]
+Key: MORESTACK_RET_RESTORE_R:  [ 0.00  0.00 ]
+Key: MOV:  [ 0.00  0.00 ]
+Key: MOVAPDmr:  [ 0.00  0.00 ]
+Key: MOVAPDrm:  [ 0.00  0.00 ]
+Key: MOVAPDrr:  [ 0.00  0.00 ]
+Key: MOVAPDrr_REV:  [ 0.00  0.00 ]
+Key: MOVAPSmr:  [ 0.00  0.00 ]
+Key: MOVAPSrm:  [ 0.00  0.00 ]
+Key: MOVAPSrr:  [ 0.00  0.00 ]
+Key: MOVAPSrr_REV:  [ 0.00  0.00 ]
+Key: MOVBE:  [ 0.00  0.00 ]
+Key: MOVDDUPrm:  [ 0.00  0.00 ]
+Key: MOVDDUPrr:  [ 0.00  0.00 ]
+Key: MOVDI:  [ 0.00  0.00 ]
+Key: MOVDIR:  [ 0.00  0.00 ]
+Key: MOVDIRI:  [ 0.00  0.00 ]
+Key: MOVDQAmr:  [ 0.00  0.00 ]
+Key: MOVDQArm:  [ 0.00  0.00 ]
+Key: MOVDQArr:  [ 0.00  0.00 ]
+Key: MOVDQArr_REV:  [ 0.00  0.00 ]
+Key: MOVDQUmr:  [ 0.00  0.00 ]
+Key: MOVDQUrm:  [ 0.00  0.00 ]
+Key: MOVDQUrr:  [ 0.00  0.00 ]
+Key: MOVDQUrr_REV:  [ 0.00  0.00 ]
+Key: MOVHLPSrr:  [ 0.00  0.00 ]
+Key: MOVHPDmr:  [ 0.00  0.00 ]
+Key: MOVHPDrm:  [ 0.00  0.00 ]
+Key: MOVHPSmr:  [ 0.00  0.00 ]
+Key: MOVHPSrm:  [ 0.00  0.00 ]
+Key: MOVLHPSrr:  [ 0.00  0.00 ]
+Key: MOVLPDmr:  [ 0.00  0.00 ]
+Key: MOVLPDrm:  [ 0.00  0.00 ]
+Key: MOVLPSmr:  [ 0.00  0.00 ]
+Key: MOVLPSrm:  [ 0.00  0.00 ]
+Key: MOVMSKPDrr:  [ 0.00  0.00 ]
+Key: MOVMSKPSrr:  [ 0.00  0.00 ]
+Key: MOVNTDQArm:  [ 0.00  0.00 ]
+Key: MOVNTDQmr:  [ 0.00  0.00 ]
+Key: MOVNTI:  [ 0.00  0.00 ]
+Key: MOVNTImr:  [ 0.00  0.00 ]
+Key: MOVNTPDmr:  [ 0.00  0.00 ]
+Key: MOVNTPSmr:  [ 0.00  0.00 ]
+Key: MOVNTSD:  [ 0.00  0.00 ]
+Key: MOVNTSS:  [ 0.00  0.00 ]
+Key: MOVPC:  [ 0.00  0.00 ]
+Key: MOVPDI:  [ 0.00  0.00 ]
+Key: MOVPQI:  [ 0.00  0.00 ]
+Key: MOVPQIto:  [ 0.00  0.00 ]
+Key: MOVQI:  [ 0.00  0.00 ]
+Key: MOVRS:  [ 0.00  0.00 ]
+Key: MOVSB:  [ 0.00  0.00 ]
+Key: MOVSDmr:  [ 0.00  0.00 ]
+Key: MOVSDrm:  [ 0.00  0.00 ]
+Key: MOVSDrm_alt:  [ 0.00  0.00 ]
+Key: MOVSDrr:  [ 0.00  0.00 ]
+Key: MOVSDrr_REV:  [ 0.00  0.00 ]
+Key: MOVSDto:  [ 0.00  0.00 ]
+Key: MOVSHDUPrm:  [ 0.00  0.00 ]
+Key: MOVSHDUPrr:  [ 0.00  0.00 ]
+Key: MOVSHPmr:  [ 0.00  0.00 ]
+Key: MOVSHPrm:  [ 0.00  0.00 ]
+Key: MOVSL:  [ 0.00  0.00 ]
+Key: MOVSLDUPrm:  [ 0.00  0.00 ]
+Key: MOVSLDUPrr:  [ 0.00  0.00 ]
+Key: MOVSQ:  [ 0.00  0.00 ]
+Key: MOVSS:  [ 0.00  0.00 ]
+Key: MOVSSmr:  [ 0.00  0.00 ]
+Key: MOVSSrm:  [ 0.00  0.00 ]
+Key: MOVSSrm_alt:  [ 0.00  0.00 ]
+Key: MOVSSrr:  [ 0.00  0.00 ]
+Key: MOVSSrr_REV:  [ 0.00  0.00 ]
+Key: MOVSW:  [ 0.00  0.00 ]
+Key: MOVSX:  [ 0.00  0.00 ]
+Key: MOVUPDmr:  [ 0.00  0.00 ]
+Key: MOVUPDrm:  [ 0.00  0.00 ]
+Key: MOVUPDrr:  [ 0.00  0.00 ]
+Key: MOVUPDrr_REV:  [ 0.00  0.00 ]
+Key: MOVUPSmr:  [ 0.00  0.00 ]
+Key: MOVUPSrm:  [ 0.00  0.00 ]
+Key: MOVUPSrr:  [ 0.00  0.00 ]
+Key: MOVUPSrr_REV:  [ 0.00  0.00 ]
+Key: MOVZPQILo:  [ 0.00  0.00 ]
+Key: MOVZX:  [ 0.00  0.00 ]
+Key: MPSADBWrmi:  [ 0.00  0.00 ]
+Key: MPSADBWrri:  [ 0.00  0.00 ]
+Key: MUL:  [ 0.00  0.00 ]
+Key: MULPDrm:  [ 0.00  0.00 ]
+Key: MULPDrr:  [ 0.00  0.00 ]
+Key: MULPSrm:  [ 0.00  0.00 ]
+Key: MULPSrr:  [ 0.00  0.00 ]
+Key: MULSDrm:  [ 0.00  0.00 ]
+Key: MULSDrm_Int:  [ 0.00  0.00 ]
+Key: MULSDrr:  [ 0.00  0.00 ]
+Key: MULSDrr_Int:  [ 0.00  0.00 ]
+Key: MULSSrm:  [ 0.00  0.00 ]
+Key: MULSSrm_Int:  [ 0.00  0.00 ]
+Key: MULSSrr:  [ 0.00  0.00 ]
+Key: MULSSrr_Int:  [ 0.00  0.00 ]
+Key: MULX:  [ 0.00  0.00 ]
+Key: MUL_F:  [ 0.00  0.00 ]
+Key: MUL_FI:  [ 0.00  0.00 ]
+Key: MUL_FPrST:  [ 0.00  0.00 ]
+Key: MUL_FST:  [ 0.00  0.00 ]
+Key: MUL_Fp:  [ 0.00  0.00 ]
+Key: MUL_FpI:  [ 0.00  0.00 ]
+Key: MUL_FrST:  [ 0.00  0.00 ]
+Key: MWAITX:  [ 0.00  0.00 ]
+Key: MWAITX_SAVE_RBX:  [ 0.00  0.00 ]
+Key: MWAITXrrr:  [ 0.00  0.00 ]
+Key: MWAITrr:  [ 0.00  0.00 ]
+Key: NEG:  [ 0.00  0.00 ]
+Key: NOOP:  [ 0.00  0.00 ]
+Key: NOOPL:  [ 0.00  0.00 ]
+Key: NOOPLr:  [ 0.00  0.00 ]
+Key: NOOPQ:  [ 0.00  0.00 ]
+Key: NOOPQr:  [ 0.00  0.00 ]
+Key: NOOPW:  [ 0.00  0.00 ]
+Key: NOOPWr:  [ 0.00  0.00 ]
+Key: NOT:  [ 0.00  0.00 ]
+Key: OR:  [ 0.00  0.00 ]
+Key: ORPDrm:  [ 0.00  0.00 ]
+Key: ORPDrr:  [ 0.00  0.00 ]
+Key: ORPSrm:  [ 0.00  0.00 ]
+Key: ORPSrr:  [ 0.00  0.00 ]
+Key: OUT:  [ 0.00  0.00 ]
+Key: OUTSB:  [ 0.00  0.00 ]
+Key: OUTSL:  [ 0.00  0.00 ]
+Key: OUTSW:  [ 0.00  0.00 ]
+Key: PABSBrm:  [ 0.00  0.00 ]
+Key: PABSBrr:  [ 0.00  0.00 ]
+Key: PABSDrm:  [ 0.00  0.00 ]
+Key: PABSDrr:  [ 0.00  0.00 ]
+Key: PABSWrm:  [ 0.00  0.00 ]
+Key: PABSWrr:  [ 0.00  0.00 ]
+Key: PACKSSDWrm:  [ 0.00  0.00 ]
+Key: PACKSSDWrr:  [ 0.00  0.00 ]
+Key: PACKSSWBrm:  [ 0.00  0.00 ]
+Key: PACKSSWBrr:  [ 0.00  0.00 ]
+Key: PACKUSDWrm:  [ 0.00  0.00 ]
+Key: PACKUSDWrr:  [ 0.00  0.00 ]
+Key: PACKUSWBrm:  [ 0.00  0.00 ]
+Key: PACKUSWBrr:  [ 0.00  0.00 ]
+Key: PADDBrm:  [ 0.00  0.00 ]
+Key: PADDBrr:  [ 0.00  0.00 ]
+Key: PADDDrm:  [ 0.00  0.00 ]
+Key: PADDDrr:  [ 0.00  0.00 ]
+Key: PADDQrm:  [ 0.00  0.00 ]
+Key: PADDQrr:  [ 0.00  0.00 ]
+Key: PADDSBrm:  [ 0.00  0.00 ]
+Key: PADDSBrr:  [ 0.00  0.00 ]
+Key: PADDSWrm:  [ 0.00  0.00 ]
+Key: PADDSWrr:  [ 0.00  0.00 ]
+Key: PADDUSBrm:  [ 0.00  0.00 ]
+Key: PADDUSBrr:  [ 0.00  0.00 ]
+Key: PADDUSWrm:  [ 0.00  0.00 ]
+Key: PADDUSWrr:  [ 0.00  0.00 ]
+Key: PADDWrm:  [ 0.00  0.00 ]
+Key: PADDWrr:  [ 0.00  0.00 ]
+Key: PALIGNRrmi:  [ 0.00  0.00 ]
+Key: PALIGNRrri:  [ 0.00  0.00 ]
+Key: PANDNrm:  [ 0.00  0.00 ]
+Key: PANDNrr:  [ 0.00  0.00 ]
+Key: PANDrm:  [ 0.00  0.00 ]
+Key: PANDrr:  [ 0.00  0.00 ]
+Key: PATCHABLE_EVENT_CALL:  [ 0.00  0.00 ]
+Key: PATCHABLE_FUNCTION_ENTER:  [ 0.00  0.00 ]
+Key: PATCHABLE_FUNCTION_EXIT:  [ 0.00  0.00 ]
+Key: PATCHABLE_OP:  [ 0.00  0.00 ]
+Key: PATCHABLE_RET:  [ 0.00  0.00 ]
+Key: PATCHABLE_TAIL_CALL:  [ 0.00  0.00 ]
+Key: PATCHABLE_TYPED_EVENT_CALL:  [ 0.00  0.00 ]
+Key: PATCHPOINT:  [ 0.00  0.00 ]
+Key: PAUSE:  [ 0.00  0.00 ]
+Key: PAVGBrm:  [ 0.00  0.00 ]
+Key: PAVGBrr:  [ 0.00  0.00 ]
+Key: PAVGUSBrm:  [ 0.00  0.00 ]
+Key: PAVGUSBrr:  [ 0.00  0.00 ]
+Key: PAVGWrm:  [ 0.00  0.00 ]
+Key: PAVGWrr:  [ 0.00  0.00 ]
+Key: PBLENDVBrm:  [ 0.00  0.00 ]
+Key: PBLENDVBrr:  [ 0.00  0.00 ]
+Key: PBLENDWrmi:  [ 0.00  0.00 ]
+Key: PBLENDWrri:  [ 0.00  0.00 ]
+Key: PBNDKB:  [ 0.00  0.00 ]
+Key: PCLMULQDQrmi:  [ 0.00  0.00 ]
+Key: PCLMULQDQrri:  [ 0.00  0.00 ]
+Key: PCMPEQBrm:  [ 0.00  0.00 ]
+Key: PCMPEQBrr:  [ 0.00  0.00 ]
+Key: PCMPEQDrm:  [ 0.00  0.00 ]
+Key: PCMPEQDrr:  [ 0.00  0.00 ]
+Key: PCMPEQQrm:  [ 0.00  0.00 ]
+Key: PCMPEQQrr:  [ 0.00  0.00 ]
+Key: PCMPEQWrm:  [ 0.00  0.00 ]
+Key: PCMPEQWrr:  [ 0.00  0.00 ]
+Key: PCMPESTRIrmi:  [ 0.00  0.00 ]
+Key: PCMPESTRIrri:  [ 0.00  0.00 ]
+Key: PCMPESTRMrmi:  [ 0.00  0.00 ]
+Key: PCMPESTRMrri:  [ 0.00  0.00 ]
+Key: PCMPGTBrm:  [ 0.00  0.00 ]
+Key: PCMPGTBrr:  [ 0.00  0.00 ]
+Key: PCMPGTDrm:  [ 0.00  0.00 ]
+Key: PCMPGTDrr:  [ 0.00  0.00 ]
+Key: PCMPGTQrm:  [ 0.00  0.00 ]
+Key: PCMPGTQrr:  [ 0.00  0.00 ]
+Key: PCMPGTWrm:  [ 0.00  0.00 ]
+Key: PCMPGTWrr:  [ 0.00  0.00 ]
+Key: PCMPISTRIrmi:  [ 0.00  0.00 ]
+Key: PCMPISTRIrri:  [ 0.00  0.00 ]
+Key: PCMPISTRMrmi:  [ 0.00  0.00 ]
+Key: PCMPISTRMrri:  [ 0.00  0.00 ]
+Key: PCONFIG:  [ 0.00  0.00 ]
+Key: PDEP:  [ 0.00  0.00 ]
+Key: PEXT:  [ 0.00  0.00 ]
+Key: PEXTRBmri:  [ 0.00  0.00 ]
+Key: PEXTRBrri:  [ 0.00  0.00 ]
+Key: PEXTRDmri:  [ 0.00  0.00 ]
+Key: PEXTRDrri:  [ 0.00  0.00 ]
+Key: PEXTRQmri:  [ 0.00  0.00 ]
+Key: PEXTRQrri:  [ 0.00  0.00 ]
+Key: PEXTRWmri:  [ 0.00  0.00 ]
+Key: PEXTRWrri:  [ 0.00  0.00 ]
+Key: PEXTRWrri_REV:  [ 0.00  0.00 ]
+Key: PF:  [ 0.00  0.00 ]
+Key: PFACCrm:  [ 0.00  0.00 ]
+Key: PFACCrr:  [ 0.00  0.00 ]
+Key: PFADDrm:  [ 0.00  0.00 ]
+Key: PFADDrr:  [ 0.00  0.00 ]
+Key: PFCMPEQrm:  [ 0.00  0.00 ]
+Key: PFCMPEQrr:  [ 0.00  0.00 ]
+Key: PFCMPGErm:  [ 0.00  0.00 ]
+Key: PFCMPGErr:  [ 0.00  0.00 ]
+Key: PFCMPGTrm:  [ 0.00  0.00 ]
+Key: PFCMPGTrr:  [ 0.00  0.00 ]
+Key: PFMAXrm:  [ 0.00  0.00 ]
+Key: PFMAXrr:  [ 0.00  0.00 ]
+Key: PFMINrm:  [ 0.00  0.00 ]
+Key: PFMINrr:  [ 0.00  0.00 ]
+Key: PFMULrm:  [ 0.00  0.00 ]
+Key: PFMULrr:  [ 0.00  0.00 ]
+Key: PFNACCrm:  [ 0.00  0.00 ]
+Key: PFNACCrr:  [ 0.00  0.00 ]
+Key: PFPNACCrm:  [ 0.00  0.00 ]
+Key: PFPNACCrr:  [ 0.00  0.00 ]
+Key: PFRCPIT:  [ 0.00  0.00 ]
+Key: PFRCPrm:  [ 0.00  0.00 ]
+Key: PFRCPrr:  [ 0.00  0.00 ]
+Key: PFRSQIT:  [ 0.00  0.00 ]
+Key: PFRSQRTrm:  [ 0.00  0.00 ]
+Key: PFRSQRTrr:  [ 0.00  0.00 ]
+Key: PFSUBRrm:  [ 0.00  0.00 ]
+Key: PFSUBRrr:  [ 0.00  0.00 ]
+Key: PFSUBrm:  [ 0.00  0.00 ]
+Key: PFSUBrr:  [ 0.00  0.00 ]
+Key: PHADDDrm:  [ 0.00  0.00 ]
+Key: PHADDDrr:  [ 0.00  0.00 ]
+Key: PHADDSWrm:  [ 0.00  0.00 ]
+Key: PHADDSWrr:  [ 0.00  0.00 ]
+Key: PHADDWrm:  [ 0.00  0.00 ]
+Key: PHADDWrr:  [ 0.00  0.00 ]
+Key: PHI:  [ 0.00  0.00 ]
+Key: PHMINPOSUWrm:  [ 0.00  0.00 ]
+Key: PHMINPOSUWrr:  [ 0.00  0.00 ]
+Key: PHSUBDrm:  [ 0.00  0.00 ]
+Key: PHSUBDrr:  [ 0.00  0.00 ]
+Key: PHSUBSWrm:  [ 0.00  0.00 ]
+Key: PHSUBSWrr:  [ 0.00  0.00 ]
+Key: PHSUBWrm:  [ 0.00  0.00 ]
+Key: PHSUBWrr:  [ 0.00  0.00 ]
+Key: PI:  [ 0.00  0.00 ]
+Key: PINSRBrmi:  [ 0.00  0.00 ]
+Key: PINSRBrri:  [ 0.00  0.00 ]
+Key: PINSRDrmi:  [ 0.00  0.00 ]
+Key: PINSRDrri:  [ 0.00  0.00 ]
+Key: PINSRQrmi:  [ 0.00  0.00 ]
+Key: PINSRQrri:  [ 0.00  0.00 ]
+Key: PINSRWrmi:  [ 0.00  0.00 ]
+Key: PINSRWrri:  [ 0.00  0.00 ]
+Key: PLDTILECFGV:  [ 0.00  0.00 ]
+Key: PLEA:  [ 0.00  0.00 ]
+Key: PMADDUBSWrm:  [ 0.00  0.00 ]
+Key: PMADDUBSWrr:  [ 0.00  0.00 ]
+Key: PMADDWDrm:  [ 0.00  0.00 ]
+Key: PMADDWDrr:  [ 0.00  0.00 ]
+Key: PMAXSBrm:  [ 0.00  0.00 ]
+Key: PMAXSBrr:  [ 0.00  0.00 ]
+Key: PMAXSDrm:  [ 0.00  0.00 ]
+Key: PMAXSDrr:  [ 0.00  0.00 ]
+Key: PMAXSWrm:  [ 0.00  0.00 ]
+Key: PMAXSWrr:  [ 0.00  0.00 ]
+Key: PMAXUBrm:  [ 0.00  0.00 ]
+Key: PMAXUBrr:  [ 0.00  0.00 ]
+Key: PMAXUDrm:  [ 0.00  0.00 ]
+Key: PMAXUDrr:  [ 0.00  0.00 ]
+Key: PMAXUWrm:  [ 0.00  0.00 ]
+Key: PMAXUWrr:  [ 0.00  0.00 ]
+Key: PMINSBrm:  [ 0.00  0.00 ]
+Key: PMINSBrr:  [ 0.00  0.00 ]
+Key: PMINSDrm:  [ 0.00  0.00 ]
+Key: PMINSDrr:  [ 0.00  0.00 ]
+Key: PMINSWrm:  [ 0.00  0.00 ]
+Key: PMINSWrr:  [ 0.00  0.00 ]
+Key: PMINUBrm:  [ 0.00  0.00 ]
+Key: PMINUBrr:  [ 0.00  0.00 ]
+Key: PMINUDrm:  [ 0.00  0.00 ]
+Key: PMINUDrr:  [ 0.00  0.00 ]
+Key: PMINUWrm:  [ 0.00  0.00 ]
+Key: PMINUWrr:  [ 0.00  0.00 ]
+Key: PMOVMSKBrr:  [ 0.00  0.00 ]
+Key: PMOVSXBDrm:  [ 0.00  0.00 ]
+Key: PMOVSXBDrr:  [ 0.00  0.00 ]
+Key: PMOVSXBQrm:  [ 0.00  0.00 ]
+Key: PMOVSXBQrr:  [ 0.00  0.00 ]
+Key: PMOVSXBWrm:  [ 0.00  0.00 ]
+Key: PMOVSXBWrr:  [ 0.00  0.00 ]
+Key: PMOVSXDQrm:  [ 0.00  0.00 ]
+Key: PMOVSXDQrr:  [ 0.00  0.00 ]
+Key: PMOVSXWDrm:  [ 0.00  0.00 ]
+Key: PMOVSXWDrr:  [ 0.00  0.00 ]
+Key: PMOVSXWQrm:  [ 0.00  0.00 ]
+Key: PMOVSXWQrr:  [ 0.00  0.00 ]
+Key: PMOVZXBDrm:  [ 0.00  0.00 ]
+Key: PMOVZXBDrr:  [ 0.00  0.00 ]
+Key: PMOVZXBQrm:  [ 0.00  0.00 ]
+Key: PMOVZXBQrr:  [ 0.00  0.00 ]
+Key: PMOVZXBWrm:  [ 0.00  0.00 ]
+Key: PMOVZXBWrr:  [ 0.00  0.00 ]
+Key: PMOVZXDQrm:  [ 0.00  0.00 ]
+Key: PMOVZXDQrr:  [ 0.00  0.00 ]
+Key: PMOVZXWDrm:  [ 0.00  0.00 ]
+Key: PMOVZXWDrr:  [ 0.00  0.00 ]
+Key: PMOVZXWQrm:  [ 0.00  0.00 ]
+Key: PMOVZXWQrr:  [ 0.00  0.00 ]
+Key: PMULDQrm:  [ 0.00  0.00 ]
+Key: PMULDQrr:  [ 0.00  0.00 ]
+Key: PMULHRSWrm:  [ 0.00  0.00 ]
+Key: PMULHRSWrr:  [ 0.00  0.00 ]
+Key: PMULHRWrm:  [ 0.00  0.00 ]
+Key: PMULHRWrr:  [ 0.00  0.00 ]
+Key: PMULHUWrm:  [ 0.00  0.00 ]
+Key: PMULHUWrr:  [ 0.00  0.00 ]
+Key: PMULHWrm:  [ 0.00  0.00 ]
+Key: PMULHWrr:  [ 0.00  0.00 ]
+Key: PMULLDrm:  [ 0.00  0.00 ]
+Key: PMULLDrr:  [ 0.00  0.00 ]
+Key: PMULLWrm:  [ 0.00  0.00 ]
+Key: PMULLWrr:  [ 0.00  0.00 ]
+Key: PMULUDQrm:  [ 0.00  0.00 ]
+Key: PMULUDQrr:  [ 0.00  0.00 ]
+Key: POP:  [ 0.00  0.00 ]
+Key: POPA:  [ 0.00  0.00 ]
+Key: POPCNT:  [ 0.00  0.00 ]
+Key: POPDS:  [ 0.00  0.00 ]
+Key: POPES:  [ 0.00  0.00 ]
+Key: POPF:  [ 0.00  0.00 ]
+Key: POPFS:  [ 0.00  0.00 ]
+Key: POPGS:  [ 0.00  0.00 ]
+Key: POPP:  [ 0.00  0.00 ]
+Key: POPSS:  [ 0.00  0.00 ]
+Key: PORrm:  [ 0.00  0.00 ]
+Key: PORrr:  [ 0.00  0.00 ]
+Key: PREALLOCATED_ARG:  [ 0.00  0.00 ]
+Key: PREALLOCATED_SETUP:  [ 0.00  0.00 ]
+Key: PREFETCH:  [ 0.00  0.00 ]
+Key: PREFETCHIT:  [ 0.00  0.00 ]
+Key: PREFETCHNTA:  [ 0.00  0.00 ]
+Key: PREFETCHRST:  [ 0.00  0.00 ]
+Key: PREFETCHT:  [ 0.00  0.00 ]
+Key: PREFETCHW:  [ 0.00  0.00 ]
+Key: PREFETCHWT:  [ 0.00  0.00 ]
+Key: PROBED_ALLOCA:  [ 0.00  0.00 ]
+Key: PSADBWrm:  [ 0.00  0.00 ]
+Key: PSADBWrr:  [ 0.00  0.00 ]
+Key: PSEUDO_PROBE:  [ 0.00  0.00 ]
+Key: PSHUFBrm:  [ 0.00  0.00 ]
+Key: PSHUFBrr:  [ 0.00  0.00 ]
+Key: PSHUFDmi:  [ 0.00  0.00 ]
+Key: PSHUFDri:  [ 0.00  0.00 ]
+Key: PSHUFHWmi:  [ 0.00  0.00 ]
+Key: PSHUFHWri:  [ 0.00  0.00 ]
+Key: PSHUFLWmi:  [ 0.00  0.00 ]
+Key: PSHUFLWri:  [ 0.00  0.00 ]
+Key: PSIGNBrm:  [ 0.00  0.00 ]
+Key: PSIGNBrr:  [ 0.00  0.00 ]
+Key: PSIGNDrm:  [ 0.00  0.00 ]
+Key: PSIGNDrr:  [ 0.00  0.00 ]
+Key: PSIGNWrm:  [ 0.00  0.00 ]
+Key: PSIGNWrr:  [ 0.00  0.00 ]
+Key: PSLLDQri:  [ 0.00  0.00 ]
+Key: PSLLDri:  [ 0.00  0.00 ]
+Key: PSLLDrm:  [ 0.00  0.00 ]
+Key: PSLLDrr:  [ 0.00  0.00 ]
+Key: PSLLQri:  [ 0.00  0.00 ]
+Key: PSLLQrm:  [ 0.00  0.00 ]
+Key: PSLLQrr:  [ 0.00  0.00 ]
+Key: PSLLWri:  [ 0.00  0.00 ]
+Key: PSLLWrm:  [ 0.00  0.00 ]
+Key: PSLLWrr:  [ 0.00  0.00 ]
+Key: PSMASH:  [ 0.00  0.00 ]
+Key: PSRADri:  [ 0.00  0.00 ]
+Key: PSRADrm:  [ 0.00  0.00 ]
+Key: PSRADrr:  [ 0.00  0.00 ]
+Key: PSRAWri:  [ 0.00  0.00 ]
+Key: PSRAWrm:  [ 0.00  0.00 ]
+Key: PSRAWrr:  [ 0.00  0.00 ]
+Key: PSRLDQri:  [ 0.00  0.00 ]
+Key: PSRLDri:  [ 0.00  0.00 ]
+Key: PSRLDrm:  [ 0.00  0.00 ]
+Key: PSRLDrr:  [ 0.00  0.00 ]
+Key: PSRLQri:  [ 0.00  0.00 ]
+Key: PSRLQrm:  [ 0.00  0.00 ]
+Key: PSRLQrr:  [ 0.00  0.00 ]
+Key: PSRLWri:  [ 0.00  0.00 ]
+Key: PSRLWrm:  [ 0.00  0.00 ]
+Key: PSRLWrr:  [ 0.00  0.00 ]
+Key: PSUBBrm:  [ 0.00  0.00 ]
+Key: PSUBBrr:  [ 0.00  0.00 ]
+Key: PSUBDrm:  [ 0.00  0.00 ]
+Key: PSUBDrr:  [ 0.00  0.00 ]
+Key: PSUBQrm:  [ 0.00  0.00 ]
+Key: PSUBQrr:  [ 0.00  0.00 ]
+Key: PSUBSBrm:  [ 0.00  0.00 ]
+Key: PSUBSBrr:  [ 0.00  0.00 ]
+Key: PSUBSWrm:  [ 0.00  0.00 ]
+Key: PSUBSWrr:  [ 0.00  0.00 ]
+Key: PSUBUSBrm:  [ 0.00  0.00 ]
+Key: PSUBUSBrr:  [ 0.00  0.00 ]
+Key: PSUBUSWrm:  [ 0.00  0.00 ]
+Key: PSUBUSWrr:  [ 0.00  0.00 ]
+Key: PSUBWrm:  [ 0.00  0.00 ]
+Key: PSUBWrr:  [ 0.00  0.00 ]
+Key: PSWAPDrm:  [ 0.00  0.00 ]
+Key: PSWAPDrr:  [ 0.00  0.00 ]
+Key: PT:  [ 0.00  0.00 ]
+Key: PTCMMIMFP:  [ 0.00  0.00 ]
+Key: PTCMMRLFP:  [ 0.00  0.00 ]
+Key: PTCONJTCMMIMFP:  [ 0.00  0.00 ]
+Key: PTCONJTFP:  [ 0.00  0.00 ]
+Key: PTCVTROWD:  [ 0.00  0.00 ]
+Key: PTCVTROWPS:  [ 0.00  0.00 ]
+Key: PTDPBF:  [ 0.00  0.00 ]
+Key: PTDPBHF:  [ 0.00  0.00 ]
+Key: PTDPBSSD:  [ 0.00  0.00 ]
+Key: PTDPBSSDV:  [ 0.00  0.00 ]
+Key: PTDPBSUD:  [ 0.00  0.00 ]
+Key: PTDPBSUDV:  [ 0.00  0.00 ]
+Key: PTDPBUSD:  [ 0.00  0.00 ]
+Key: PTDPBUSDV:  [ 0.00  0.00 ]
+Key: PTDPBUUD:  [ 0.00  0.00 ]
+Key: PTDPBUUDV:  [ 0.00  0.00 ]
+Key: PTDPFP:  [ 0.00  0.00 ]
+Key: PTDPHBF:  [ 0.00  0.00 ]
+Key: PTDPHF:  [ 0.00  0.00 ]
+Key: PTESTrm:  [ 0.00  0.00 ]
+Key: PTESTrr:  [ 0.00  0.00 ]
+Key: PTILELOADD:  [ 0.00  0.00 ]
+Key: PTILELOADDRS:  [ 0.00  0.00 ]
+Key: PTILELOADDRST:  [ 0.00  0.00 ]
+Key: PTILELOADDRSV:  [ 0.00  0.00 ]
+Key: PTILELOADDT:  [ 0.00  0.00 ]
+Key: PTILELOADDV:  [ 0.00  0.00 ]
+Key: PTILEMOVROWrre:  [ 0.00  0.00 ]
+Key: PTILEMOVROWrreV:  [ 0.00  0.00 ]
+Key: PTILEMOVROWrri:  [ 0.00  0.00 ]
+Key: PTILEMOVROWrriV:  [ 0.00  0.00 ]
+Key: PTILEPAIRLOAD:  [ 0.00  0.00 ]
+Key: PTILEPAIRSTORE:  [ 0.00  0.00 ]
+Key: PTILESTORED:  [ 0.00  0.00 ]
+Key: PTILESTOREDV:  [ 0.00  0.00 ]
+Key: PTILEZERO:  [ 0.00  0.00 ]
+Key: PTILEZEROV:  [ 0.00  0.00 ]
+Key: PTMMULTF:  [ 0.00  0.00 ]
+Key: PTTCMMIMFP:  [ 0.00  0.00 ]
+Key: PTTCMMRLFP:  [ 0.00  0.00 ]
+Key: PTTDPBF:  [ 0.00  0.00 ]
+Key: PTTDPFP:  [ 0.00  0.00 ]
+Key: PTTMMULTF:  [ 0.00  0.00 ]
+Key: PTTRANSPOSED:  [ 0.00  0.00 ]
+Key: PTTRANSPOSEDV:  [ 0.00  0.00 ]
+Key: PTWRITE:  [ 0.00  0.00 ]
+Key: PTWRITEm:  [ 0.00  0.00 ]
+Key: PTWRITEr:  [ 0.00  0.00 ]
+Key: PUNPCKHBWrm:  [ 0.00  0.00 ]
+Key: PUNPCKHBWrr:  [ 0.00  0.00 ]
+Key: PUNPCKHDQrm:  [ 0.00  0.00 ]
+Key: PUNPCKHDQrr:  [ 0.00  0.00 ]
+Key: PUNPCKHQDQrm:  [ 0.00  0.00 ]
+Key: PUNPCKHQDQrr:  [ 0.00  0.00 ]
+Key: PUNPCKHWDrm:  [ 0.00  0.00 ]
+Key: PUNPCKHWDrr:  [ 0.00  0.00 ]
+Key: PUNPCKLBWrm:  [ 0.00  0.00 ]
+Key: PUNPCKLBWrr:  [ 0.00  0.00 ]
+Key: PUNPCKLDQrm:  [ 0.00  0.00 ]
+Key: PUNPCKLDQrr:  [ 0.00  0.00 ]
+Key: PUNPCKLQDQrm:  [ 0.00  0.00 ]
+Key: PUNPCKLQDQrr:  [ 0.00  0.00 ]
+Key: PUNPCKLWDrm:  [ 0.00  0.00 ]
+Key: PUNPCKLWDrr:  [ 0.00  0.00 ]
+Key: PUSH:  [ 0.00  0.00 ]
+Key: PUSHA:  [ 0.00  0.00 ]
+Key: PUSHCS:  [ 0.00  0.00 ]
+Key: PUSHDS:  [ 0.00  0.00 ]
+Key: PUSHES:  [ 0.00  0.00 ]
+Key: PUSHF:  [ 0.00  0.00 ]
+Key: PUSHFS:  [ 0.00  0.00 ]
+Key: PUSHGS:  [ 0.00  0.00 ]
+Key: PUSHP:  [ 0.00  0.00 ]
+Key: PUSHSS:  [ 0.00  0.00 ]
+Key: PVALIDATE:  [ 0.00  0.00 ]
+Key: PXORrm:  [ 0.00  0.00 ]
+Key: PXORrr:  [ 0.00  0.00 ]
+Key: RCL:  [ 0.00  0.00 ]
+Key: RCPPSm:  [ 0.00  0.00 ]
+Key: RCPPSr:  [ 0.00  0.00 ]
+Key: RCPSSm:  [ 0.00  0.00 ]
+Key: RCPSSm_Int:  [ 0.00  0.00 ]
+Key: RCPSSr:  [ 0.00  0.00 ]
+Key: RCPSSr_Int:  [ 0.00  0.00 ]
+Key: RCR:  [ 0.00  0.00 ]
+Key: RDFLAGS:  [ 0.00  0.00 ]
+Key: RDFSBASE:  [ 0.00  0.00 ]
+Key: RDGSBASE:  [ 0.00  0.00 ]
+Key: RDMSR:  [ 0.00  0.00 ]
+Key: RDMSRLIST:  [ 0.00  0.00 ]
+Key: RDMSRri:  [ 0.00  0.00 ]
+Key: RDMSRri_EVEX:  [ 0.00  0.00 ]
+Key: RDPID:  [ 0.00  0.00 ]
+Key: RDPKRUr:  [ 0.00  0.00 ]
+Key: RDPMC:  [ 0.00  0.00 ]
+Key: RDPRU:  [ 0.00  0.00 ]
+Key: RDRAND:  [ 0.00  0.00 ]
+Key: RDSEED:  [ 0.00  0.00 ]
+Key: RDSSPD:  [ 0.00  0.00 ]
+Key: RDSSPQ:  [ 0.00  0.00 ]
+Key: RDTSC:  [ 0.00  0.00 ]
+Key: RDTSCP:  [ 0.00  0.00 ]
+Key: REG_SEQUENCE:  [ 0.00  0.00 ]
+Key: REPNE_PREFIX:  [ 0.00  0.00 ]
+Key: REP_MOVSB:  [ 0.00  0.00 ]
+Key: REP_MOVSD:  [ 0.00  0.00 ]
+Key: REP_MOVSQ:  [ 0.00  0.00 ]
+Key: REP_MOVSW:  [ 0.00  0.00 ]
+Key: REP_PREFIX:  [ 0.00  0.00 ]
+Key: REP_STOSB:  [ 0.00  0.00 ]
+Key: REP_STOSD:  [ 0.00  0.00 ]
+Key: REP_STOSQ:  [ 0.00  0.00 ]
+Key: REP_STOSW:  [ 0.00  0.00 ]
+Key: RET:  [ 0.00  0.00 ]
+Key: RETI:  [ 0.00  0.00 ]
+Key: REX:  [ 0.00  0.00 ]
+Key: RMPADJUST:  [ 0.00  0.00 ]
+Key: RMPQUERY:  [ 0.00  0.00 ]
+Key: RMPUPDATE:  [ 0.00  0.00 ]
+Key: ROL:  [ 0.00  0.00 ]
+Key: ROR:  [ 0.00  0.00 ]
+Key: RORX:  [ 0.00  0.00 ]
+Key: ROUNDPDmi:  [ 0.00  0.00 ]
+Key: ROUNDPDri:  [ 0.00  0.00 ]
+Key: ROUNDPSmi:  [ 0.00  0.00 ]
+Key: ROUNDPSri:  [ 0.00  0.00 ]
+Key: ROUNDSDmi:  [ 0.00  0.00 ]
+Key: ROUNDSDmi_Int:  [ 0.00  0.00 ]
+Key: ROUNDSDri:  [ 0.00  0.00 ]
+Key: ROUNDSDri_Int:  [ 0.00  0.00 ]
+Key: ROUNDSSmi:  [ 0.00  0.00 ]
+Key: ROUNDSSmi_Int:  [ 0.00  0.00 ]
+Key: ROUNDSSri:  [ 0.00  0.00 ]
+Key: ROUNDSSri_Int:  [ 0.00  0.00 ]
+Key: RSM:  [ 0.00  0.00 ]
+Key: RSQRTPSm:  [ 0.00  0.00 ]
+Key: RSQRTPSr:  [ 0.00  0.00 ]
+Key: RSQRTSSm:  [ 0.00  0.00 ]
+Key: RSQRTSSm_Int:  [ 0.00  0.00 ]
+Key: RSQRTSSr:  [ 0.00  0.00 ]
+Key: RSQRTSSr_Int:  [ 0.00  0.00 ]
+Key: RSTORSSP:  [ 0.00  0.00 ]
+Key: SAHF:  [ 0.00  0.00 ]
+Key: SALC:  [ 0.00  0.00 ]
+Key: SAR:  [ 0.00  0.00 ]
+Key: SARX:  [ 0.00  0.00 ]
+Key: SAVEPREVSSP:  [ 0.00  0.00 ]
+Key: SBB:  [ 0.00  0.00 ]
+Key: SCASB:  [ 0.00  0.00 ]
+Key: SCASL:  [ 0.00  0.00 ]
+Key: SCASQ:  [ 0.00  0.00 ]
+Key: SCASW:  [ 0.00  0.00 ]
+Key: SEAMCALL:  [ 0.00  0.00 ]
+Key: SEAMOPS:  [ 0.00  0.00 ]
+Key: SEAMRET:  [ 0.00  0.00 ]
+Key: SEG_ALLOCA:  [ 0.00  0.00 ]
+Key: SEH_BeginEpilogue:  [ 0.00  0.00 ]
+Key: SEH_EndEpilogue:  [ 0.00  0.00 ]
+Key: SEH_EndPrologue:  [ 0.00  0.00 ]
+Key: SEH_PushFrame:  [ 0.00  0.00 ]
+Key: SEH_PushReg:  [ 0.00  0.00 ]
+Key: SEH_SaveReg:  [ 0.00  0.00 ]
+Key: SEH_SaveXMM:  [ 0.00  0.00 ]
+Key: SEH_SetFrame:  [ 0.00  0.00 ]
+Key: SEH_StackAlign:  [ 0.00  0.00 ]
+Key: SEH_StackAlloc:  [ 0.00  0.00 ]
+Key: SEH_UnwindV:  [ 0.00  0.00 ]
+Key: SEH_UnwindVersion:  [ 0.00  0.00 ]
+Key: SENDUIPI:  [ 0.00  0.00 ]
+Key: SERIALIZE:  [ 0.00  0.00 ]
+Key: SETB_C:  [ 0.00  0.00 ]
+Key: SETCCm:  [ 0.00  0.00 ]
+Key: SETCCm_EVEX:  [ 0.00  0.00 ]
+Key: SETCCr:  [ 0.00  0.00 ]
+Key: SETCCr_EVEX:  [ 0.00  0.00 ]
+Key: SETSSBSY:  [ 0.00  0.00 ]
+Key: SETZUCCm:  [ 0.00  0.00 ]
+Key: SETZUCCr:  [ 0.00  0.00 ]
+Key: SFENCE:  [ 0.00  0.00 ]
+Key: SGDT:  [ 0.00  0.00 ]
+Key: SHA:  [ 0.00  0.00 ]
+Key: SHL:  [ 0.00  0.00 ]
+Key: SHLD:  [ 0.00  0.00 ]
+Key: SHLDROT:  [ 0.00  0.00 ]
+Key: SHLX:  [ 0.00  0.00 ]
+Key: SHR:  [ 0.00  0.00 ]
+Key: SHRD:  [ 0.00  0.00 ]
+Key: SHRDROT:  [ 0.00  0.00 ]
+Key: SHRX:  [ 0.00  0.00 ]
+Key: SHUFPDrmi:  [ 0.00  0.00 ]
+Key: SHUFPDrri:  [ 0.00  0.00 ]
+Key: SHUFPSrmi:  [ 0.00  0.00 ]
+Key: SHUFPSrri:  [ 0.00  0.00 ]
+Key: SIDT:  [ 0.00  0.00 ]
+Key: SKINIT:  [ 0.00  0.00 ]
+Key: SLDT:  [ 0.00  0.00 ]
+Key: SLWPCB:  [ 0.00  0.00 ]
+Key: SMSW:  [ 0.00  0.00 ]
+Key: SQRTPDm:  [ 0.00  0.00 ]
+Key: SQRTPDr:  [ 0.00  0.00 ]
+Key: SQRTPSm:  [ 0.00  0.00 ]
+Key: SQRTPSr:  [ 0.00  0.00 ]
+Key: SQRTSDm:  [ 0.00  0.00 ]
+Key: SQRTSDm_Int:  [ 0.00  0.00 ]
+Key: SQRTSDr:  [ 0.00  0.00 ]
+Key: SQRTSDr_Int:  [ 0.00  0.00 ]
+Key: SQRTSSm:  [ 0.00  0.00 ]
+Key: SQRTSSm_Int:  [ 0.00  0.00 ]
+Key: SQRTSSr:  [ 0.00  0.00 ]
+Key: SQRTSSr_Int:  [ 0.00  0.00 ]
+Key: SQRT_F:  [ 0.00  0.00 ]
+Key: SQRT_Fp:  [ 0.00  0.00 ]
+Key: SS_PREFIX:  [ 0.00  0.00 ]
+Key: STAC:  [ 0.00  0.00 ]
+Key: STACKALLOC_W_PROBING:  [ 0.00  0.00 ]
+Key: STACKMAP:  [ 0.00  0.00 ]
+Key: STATEPOINT:  [ 0.00  0.00 ]
+Key: STC:  [ 0.00  0.00 ]
+Key: STD:  [ 0.00  0.00 ]
+Key: STGI:  [ 0.00  0.00 ]
+Key: STI:  [ 0.00  0.00 ]
+Key: STMXCSR:  [ 0.00  0.00 ]
+Key: STOSB:  [ 0.00  0.00 ]
+Key: STOSL:  [ 0.00  0.00 ]
+Key: STOSQ:  [ 0.00  0.00 ]
+Key: STOSW:  [ 0.00  0.00 ]
+Key: STR:  [ 0.00  0.00 ]
+Key: STRm:  [ 0.00  0.00 ]
+Key: STTILECFG:  [ 0.00  0.00 ]
+Key: STTILECFG_EVEX:  [ 0.00  0.00 ]
+Key: STUI:  [ 0.00  0.00 ]
+Key: ST_F:  [ 0.00  0.00 ]
+Key: ST_FP:  [ 0.00  0.00 ]
+Key: ST_FPrr:  [ 0.00  0.00 ]
+Key: ST_Fp:  [ 0.00  0.00 ]
+Key: ST_FpP:  [ 0.00  0.00 ]
+Key: ST_Frr:  [ 0.00  0.00 ]
+Key: SUB:  [ 0.00  0.00 ]
+Key: SUBPDrm:  [ 0.00  0.00 ]
+Key: SUBPDrr:  [ 0.00  0.00 ]
+Key: SUBPSrm:  [ 0.00  0.00 ]
+Key: SUBPSrr:  [ 0.00  0.00 ]
+Key: SUBREG_TO_REG:  [ 0.00  0.00 ]
+Key: SUBR_F:  [ 0.00  0.00 ]
+Key: SUBR_FI:  [ 0.00  0.00 ]
+Key: SUBR_FPrST:  [ 0.00  0.00 ]
+Key: SUBR_FST:  [ 0.00  0.00 ]
+Key: SUBR_Fp:  [ 0.00  0.00 ]
+Key: SUBR_FpI:  [ 0.00  0.00 ]
+Key: SUBR_FrST:  [ 0.00  0.00 ]
+Key: SUBSDrm:  [ 0.00  0.00 ]
+Key: SUBSDrm_Int:  [ 0.00  0.00 ]
+Key: SUBSDrr:  [ 0.00  0.00 ]
+Key: SUBSDrr_Int:  [ 0.00  0.00 ]
+Key: SUBSSrm:  [ 0.00  0.00 ]
+Key: SUBSSrm_Int:  [ 0.00  0.00 ]
+Key: SUBSSrr:  [ 0.00  0.00 ]
+Key: SUBSSrr_Int:  [ 0.00  0.00 ]
+Key: SUB_F:  [ 0.00  0.00 ]
+Key: SUB_FI:  [ 0.00  0.00 ]
+Key: SUB_FPrST:  [ 0.00  0.00 ]
+Key: SUB_FST:  [ 0.00  0.00 ]
+Key: SUB_Fp:  [ 0.00  0.00 ]
+Key: SUB_FpI:  [ 0.00  0.00 ]
+Key: SUB_FrST:  [ 0.00  0.00 ]
+Key: SWAPGS:  [ 0.00  0.00 ]
+Key: SYSCALL:  [ 0.00  0.00 ]
+Key: SYSENTER:  [ 0.00  0.00 ]
+Key: SYSEXIT:  [ 0.00  0.00 ]
+Key: SYSRET:  [ 0.00  0.00 ]
+Key: T:  [ 0.00  0.00 ]
+Key: TAILJMPd:  [ 0.00  0.00 ]
+Key: TAILJMPd_CC:  [ 0.00  0.00 ]
+Key: TAILJMPm:  [ 0.00  0.00 ]
+Key: TAILJMPr:  [ 0.00  0.00 ]
+Key: TCMMIMFP:  [ 0.00  0.00 ]
+Key: TCMMRLFP:  [ 0.00  0.00 ]
+Key: TCONJTCMMIMFP:  [ 0.00  0.00 ]
+Key: TCONJTFP:  [ 0.00  0.00 ]
+Key: TCRETURN_HIPE:  [ 0.00  0.00 ]
+Key: TCRETURN_WIN:  [ 0.00  0.00 ]
+Key: TCRETURN_WINmi:  [ 0.00  0.00 ]
+Key: TCRETURNdi:  [ 0.00  0.00 ]
+Key: TCRETURNdicc:  [ 0.00  0.00 ]
+Key: TCRETURNmi:  [ 0.00  0.00 ]
+Key: TCRETURNri:  [ 0.00  0.00 ]
+Key: TCVTROWD:  [ 0.00  0.00 ]
+Key: TCVTROWPS:  [ 0.00  0.00 ]
+Key: TDCALL:  [ 0.00  0.00 ]
+Key: TDPBF:  [ 0.00  0.00 ]
+Key: TDPBHF:  [ 0.00  0.00 ]
+Key: TDPBSSD:  [ 0.00  0.00 ]
+Key: TDPBSUD:  [ 0.00  0.00 ]
+Key: TDPBUSD:  [ 0.00  0.00 ]
+Key: TDPBUUD:  [ 0.00  0.00 ]
+Key: TDPFP:  [ 0.00  0.00 ]
+Key: TDPHBF:  [ 0.00  0.00 ]
+Key: TDPHF:  [ 0.00  0.00 ]
+Key: TEST:  [ 0.00  0.00 ]
+Key: TESTUI:  [ 0.00  0.00 ]
+Key: TILELOADD:  [ 0.00  0.00 ]
+Key: TILELOADDRS:  [ 0.00  0.00 ]
+Key: TILELOADDRST:  [ 0.00  0.00 ]
+Key: TILELOADDRS_EVEX:  [ 0.00  0.00 ]
+Key: TILELOADDT:  [ 0.00  0.00 ]
+Key: TILELOADD_EVEX:  [ 0.00  0.00 ]
+Key: TILEMOVROWrre:  [ 0.00  0.00 ]
+Key: TILEMOVROWrri:  [ 0.00  0.00 ]
+Key: TILERELEASE:  [ 0.00  0.00 ]
+Key: TILESTORED:  [ 0.00  0.00 ]
+Key: TILESTORED_EVEX:  [ 0.00  0.00 ]
+Key: TILEZERO:  [ 0.00  0.00 ]
+Key: TLBSYNC:  [ 0.00  0.00 ]
+Key: TLSCall:  [ 0.00  0.00 ]
+Key: TLS_addr:  [ 0.00  0.00 ]
+Key: TLS_addrX:  [ 0.00  0.00 ]
+Key: TLS_base_addr:  [ 0.00  0.00 ]
+Key: TLS_base_addrX:  [ 0.00  0.00 ]
+Key: TLS_desc:  [ 0.00  0.00 ]
+Key: TMMULTF:  [ 0.00  0.00 ]
+Key: TPAUSE:  [ 0.00  0.00 ]
+Key: TRAP:  [ 0.00  0.00 ]
+Key: TST_F:  [ 0.00  0.00 ]
+Key: TST_Fp:  [ 0.00  0.00 ]
+Key: TTCMMIMFP:  [ 0.00  0.00 ]
+Key: TTCMMRLFP:  [ 0.00  0.00 ]
+Key: TTDPBF:  [ 0.00  0.00 ]
+Key: TTDPFP:  [ 0.00  0.00 ]
+Key: TTMMULTF:  [ 0.00  0.00 ]
+Key: TTRANSPOSED:  [ 0.00  0.00 ]
+Key: TZCNT:  [ 0.00  0.00 ]
+Key: TZMSK:  [ 0.00  0.00 ]
+Key: UBSAN_UD:  [ 0.00  0.00 ]
+Key: UCOMISDrm:  [ 0.00  0.00 ]
+Key: UCOMISDrm_Int:  [ 0.00  0.00 ]
+Key: UCOMISDrr:  [ 0.00  0.00 ]
+Key: UCOMISDrr_Int:  [ 0.00  0.00 ]
+Key: UCOMISSrm:  [ 0.00  0.00 ]
+Key: UCOMISSrm_Int:  [ 0.00  0.00 ]
+Key: UCOMISSrr:  [ 0.00  0.00 ]
+Key: UCOMISSrr_Int:  [ 0.00  0.00 ]
+Key: UCOM_FIPr:  [ 0.00  0.00 ]
+Key: UCOM_FIr:  [ 0.00  0.00 ]
+Key: UCOM_FPPr:  [ 0.00  0.00 ]
+Key: UCOM_FPr:  [ 0.00  0.00 ]
+Key: UCOM_FpIr:  [ 0.00  0.00 ]
+Key: UCOM_Fpr:  [ 0.00  0.00 ]
+Key: UCOM_Fr:  [ 0.00  0.00 ]
+Key: UD:  [ 0.00  0.00 ]
+Key: UIRET:  [ 0.00  0.00 ]
+Key: UMONITOR:  [ 0.00  0.00 ]
+Key: UMWAIT:  [ 0.00  0.00 ]
+Key: UNPCKHPDrm:  [ 0.00  0.00 ]
+Key: UNPCKHPDrr:  [ 0.00  0.00 ]
+Key: UNPCKHPSrm:  [ 0.00  0.00 ]
+Key: UNPCKHPSrr:  [ 0.00  0.00 ]
+Key: UNPCKLPDrm:  [ 0.00  0.00 ]
+Key: UNPCKLPDrr:  [ 0.00  0.00 ]
+Key: UNPCKLPSrm:  [ 0.00  0.00 ]
+Key: UNPCKLPSrr:  [ 0.00  0.00 ]
+Key: URDMSRri:  [ 0.00  0.00 ]
+Key: URDMSRri_EVEX:  [ 0.00  0.00 ]
+Key: URDMSRrr:  [ 0.00  0.00 ]
+Key: URDMSRrr_EVEX:  [ 0.00  0.00 ]
+Key: UWRMSRir:  [ 0.00  0.00 ]
+Key: UWRMSRir_EVEX:  [ 0.00  0.00 ]
+Key: UWRMSRrr:  [ 0.00  0.00 ]
+Key: UWRMSRrr_EVEX:  [ 0.00  0.00 ]
+Key: V:  [ 0.00  0.00 ]
+Key: VAARG:  [ 0.00  0.00 ]
+Key: VAARG_X:  [ 0.00  0.00 ]
+Key: VADDBF:  [ 0.00  0.00 ]
+Key: VADDPDYrm:  [ 0.00  0.00 ]
+Key: VADDPDYrr:  [ 0.00  0.00 ]
+Key: VADDPDZ:  [ 0.00  0.00 ]
+Key: VADDPDZrm:  [ 0.00  0.00 ]
+Key: VADDPDZrmb:  [ 0.00  0.00 ]
+Key: VADDPDZrmbk:  [ 0.00  0.00 ]
+Key: VADDPDZrmbkz:  [ 0.00  0.00 ]
+Key: VADDPDZrmk:  [ 0.00  0.00 ]
+Key: VADDPDZrmkz:  [ 0.00  0.00 ]
+Key: VADDPDZrr:  [ 0.00  0.00 ]
+Key: VADDPDZrrb:  [ 0.00  0.00 ]
+Key: VADDPDZrrbk:  [ 0.00  0.00 ]
+Key: VADDPDZrrbkz:  [ 0.00  0.00 ]
+Key: VADDPDZrrk:  [ 0.00  0.00 ]
+Key: VADDPDZrrkz:  [ 0.00  0.00 ]
+Key: VADDPDrm:  [ 0.00  0.00 ]
+Key: VADDPDrr:  [ 0.00  0.00 ]
+Key: VADDPHZ:  [ 0.00  0.00 ]
+Key: VADDPHZrm:  [ 0.00  0.00 ]
+Key: VADDPHZrmb:  [ 0.00  0.00 ]
+Key: VADDPHZrmbk:  [ 0.00  0.00 ]
+Key: VADDPHZrmbkz:  [ 0.00  0.00 ]
+Key: VADDPHZrmk:  [ 0.00  0.00 ]
+Key: VADDPHZrmkz:  [ 0.00  0.00 ]
+Key: VADDPHZrr:  [ 0.00  0.00 ]
+Key: VADDPHZrrb:  [ 0.00  0.00 ]
+Key: VADDPHZrrbk:  [ 0.00  0.00 ]
+Key: VADDPHZrrbkz:  [ 0.00  0.00 ]
+Key: VADDPHZrrk:  [ 0.00  0.00 ]
+Key: VADDPHZrrkz:  [ 0.00  0.00 ]
+Key: VADDPSYrm:  [ 0.00  0.00 ]
+Key: VADDPSYrr:  [ 0.00  0.00 ]
+Key: VADDPSZ:  [ 0.00  0.00 ]
+Key: VADDPSZrm:  [ 0.00  0.00 ]
+Key: VADDPSZrmb:  [ 0.00  0.00 ]
+Key: VADDPSZrmbk:  [ 0.00  0.00 ]
+Key: VADDPSZrmbkz:  [ 0.00  0.00 ]
+Key: VADDPSZrmk:  [ 0.00  0.00 ]
+Key: VADDPSZrmkz:  [ 0.00  0.00 ]
+Key: VADDPSZrr:  [ 0.00  0.00 ]
+Key: VADDPSZrrb:  [ 0.00  0.00 ]
+Key: VADDPSZrrbk:  [ 0.00  0.00 ]
+Key: VADDPSZrrbkz:  [ 0.00  0.00 ]
+Key: VADDPSZrrk:  [ 0.00  0.00 ]
+Key: VADDPSZrrkz:  [ 0.00  0.00 ]
+Key: VADDPSrm:  [ 0.00  0.00 ]
+Key: VADDPSrr:  [ 0.00  0.00 ]
+Key: VADDSDZrm:  [ 0.00  0.00 ]
+Key: VADDSDZrm_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrr:  [ 0.00  0.00 ]
+Key: VADDSDZrr_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VADDSDrm:  [ 0.00  0.00 ]
+Key: VADDSDrm_Int:  [ 0.00  0.00 ]
+Key: VADDSDrr:  [ 0.00  0.00 ]
+Key: VADDSDrr_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrm:  [ 0.00  0.00 ]
+Key: VADDSHZrm_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrr:  [ 0.00  0.00 ]
+Key: VADDSHZrr_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrm:  [ 0.00  0.00 ]
+Key: VADDSSZrm_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrr:  [ 0.00  0.00 ]
+Key: VADDSSZrr_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VADDSSrm:  [ 0.00  0.00 ]
+Key: VADDSSrm_Int:  [ 0.00  0.00 ]
+Key: VADDSSrr:  [ 0.00  0.00 ]
+Key: VADDSSrr_Int:  [ 0.00  0.00 ]
+Key: VADDSUBPDYrm:  [ 0.00  0.00 ]
+Key: VADDSUBPDYrr:  [ 0.00  0.00 ]
+Key: VADDSUBPDrm:  [ 0.00  0.00 ]
+Key: VADDSUBPDrr:  [ 0.00  0.00 ]
+Key: VADDSUBPSYrm:  [ 0.00  0.00 ]
+Key: VADDSUBPSYrr:  [ 0.00  0.00 ]
+Key: VADDSUBPSrm:  [ 0.00  0.00 ]
+Key: VADDSUBPSrr:  [ 0.00  0.00 ]
+Key: VAESDECLASTYrm:  [ 0.00  0.00 ]
+Key: VAESDECLASTYrr:  [ 0.00  0.00 ]
+Key: VAESDECLASTZ:  [ 0.00  0.00 ]
+Key: VAESDECLASTZrm:  [ 0.00  0.00 ]
+Key: VAESDECLASTZrr:  [ 0.00  0.00 ]
+Key: VAESDECLASTrm:  [ 0.00  0.00 ]
+Key: VAESDECLASTrr:  [ 0.00  0.00 ]
+Key: VAESDECYrm:  [ 0.00  0.00 ]
+Key: VAESDECYrr:  [ 0.00  0.00 ]
+Key: VAESDECZ:  [ 0.00  0.00 ]
+Key: VAESDECZrm:  [ 0.00  0.00 ]
+Key: VAESDECZrr:  [ 0.00  0.00 ]
+Key: VAESDECrm:  [ 0.00  0.00 ]
+Key: VAESDECrr:  [ 0.00  0.00 ]
+Key: VAESENCLASTYrm:  [ 0.00  0.00 ]
+Key: VAESENCLASTYrr:  [ 0.00  0.00 ]
+Key: VAESENCLASTZ:  [ 0.00  0.00 ]
+Key: VAESENCLASTZrm:  [ 0.00  0.00 ]
+Key: VAESENCLASTZrr:  [ 0.00  0.00 ]
+Key: VAESENCLASTrm:  [ 0.00  0.00 ]
+Key: VAESENCLASTrr:  [ 0.00  0.00 ]
+Key: VAESENCYrm:  [ 0.00  0.00 ]
+Key: VAESENCYrr:  [ 0.00  0.00 ]
+Key: VAESENCZ:  [ 0.00  0.00 ]
+Key: VAESENCZrm:  [ 0.00  0.00 ]
+Key: VAESENCZrr:  [ 0.00  0.00 ]
+Key: VAESENCrm:  [ 0.00  0.00 ]
+Key: VAESENCrr:  [ 0.00  0.00 ]
+Key: VAESIMCrm:  [ 0.00  0.00 ]
+Key: VAESIMCrr:  [ 0.00  0.00 ]
+Key: VAESKEYGENASSISTrmi:  [ 0.00  0.00 ]
+Key: VAESKEYGENASSISTrri:  [ 0.00  0.00 ]
+Key: VALIGNDZ:  [ 0.00  0.00 ]
+Key: VALIGNDZrmbi:  [ 0.00  0.00 ]
+Key: VALIGNDZrmbik:  [ 0.00  0.00 ]
+Key: VALIGNDZrmbikz:  [ 0.00  0.00 ]
+Key: VALIGNDZrmi:  [ 0.00  0.00 ]
+Key: VALIGNDZrmik:  [ 0.00  0.00 ]
+Key: VALIGNDZrmikz:  [ 0.00  0.00 ]
+Key: VALIGNDZrri:  [ 0.00  0.00 ]
+Key: VALIGNDZrrik:  [ 0.00  0.00 ]
+Key: VALIGNDZrrikz:  [ 0.00  0.00 ]
+Key: VALIGNQZ:  [ 0.00  0.00 ]
+Key: VALIGNQZrmbi:  [ 0.00  0.00 ]
+Key: VALIGNQZrmbik:  [ 0.00  0.00 ]
+Key: VALIGNQZrmbikz:  [ 0.00  0.00 ]
+Key: VALIGNQZrmi:  [ 0.00  0.00 ]
+Key: VALIGNQZrmik:  [ 0.00  0.00 ]
+Key: VALIGNQZrmikz:  [ 0.00  0.00 ]
+Key: VALIGNQZrri:  [ 0.00  0.00 ]
+Key: VALIGNQZrrik:  [ 0.00  0.00 ]
+Key: VALIGNQZrrikz:  [ 0.00  0.00 ]
+Key: VANDNPDYrm:  [ 0.00  0.00 ]
+Key: VANDNPDYrr:  [ 0.00  0.00 ]
+Key: VANDNPDZ:  [ 0.00  0.00 ]
+Key: VANDNPDZrm:  [ 0.00  0.00 ]
+Key: VANDNPDZrmb:  [ 0.00  0.00 ]
+Key: VANDNPDZrmbk:  [ 0.00  0.00 ]
+Key: VANDNPDZrmbkz:  [ 0.00  0.00 ]
+Key: VANDNPDZrmk:  [ 0.00  0.00 ]
+Key: VANDNPDZrmkz:  [ 0.00  0.00 ]
+Key: VANDNPDZrr:  [ 0.00  0.00 ]
+Key: VANDNPDZrrk:  [ 0.00  0.00 ]
+Key: VANDNPDZrrkz:  [ 0.00  0.00 ]
+Key: VANDNPDrm:  [ 0.00  0.00 ]
+Key: VANDNPDrr:  [ 0.00  0.00 ]
+Key: VANDNPSYrm:  [ 0.00  0.00 ]
+Key: VANDNPSYrr:  [ 0.00  0.00 ]
+Key: VANDNPSZ:  [ 0.00  0.00 ]
+Key: VANDNPSZrm:  [ 0.00  0.00 ]
+Key: VANDNPSZrmb:  [ 0.00  0.00 ]
+Key: VANDNPSZrmbk:  [ 0.00  0.00 ]
+Key: VANDNPSZrmbkz:  [ 0.00  0.00 ]
+Key: VANDNPSZrmk:  [ 0.00  0.00 ]
+Key: VANDNPSZrmkz:  [ 0.00  0.00 ]
+Key: VANDNPSZrr:  [ 0.00  0.00 ]
+Key: VANDNPSZrrk:  [ 0.00  0.00 ]
+Key: VANDNPSZrrkz:  [ 0.00  0.00 ]
+Key: VANDNPSrm:  [ 0.00  0.00 ]
+Key: VANDNPSrr:  [ 0.00  0.00 ]
+Key: VANDPDYrm:  [ 0.00  0.00 ]
+Key: VANDPDYrr:  [ 0.00  0.00 ]
+Key: VANDPDZ:  [ 0.00  0.00 ]
+Key: VANDPDZrm:  [ 0.00  0.00 ]
+Key: VANDPDZrmb:  [ 0.00  0.00 ]
+Key: VANDPDZrmbk:  [ 0.00  0.00 ]
+Key: VANDPDZrmbkz:  [ 0.00  0.00 ]
+Key: VANDPDZrmk:  [ 0.00  0.00 ]
+Key: VANDPDZrmkz:  [ 0.00  0.00 ]
+Key: VANDPDZrr:  [ 0.00  0.00 ]
+Key: VANDPDZrrk:  [ 0.00  0.00 ]
+Key: VANDPDZrrkz:  [ 0.00  0.00 ]
+Key: VANDPDrm:  [ 0.00  0.00 ]
+Key: VANDPDrr:  [ 0.00  0.00 ]
+Key: VANDPSYrm:  [ 0.00  0.00 ]
+Key: VANDPSYrr:  [ 0.00  0.00 ]
+Key: VANDPSZ:  [ 0.00  0.00 ]
+Key: VANDPSZrm:  [ 0.00  0.00 ]
+Key: VANDPSZrmb:  [ 0.00  0.00 ]
+Key: VANDPSZrmbk:  [ 0.00  0.00 ]
+Key: VANDPSZrmbkz:  [ 0.00  0.00 ]
+Key: VANDPSZrmk:  [ 0.00  0.00 ]
+Key: VANDPSZrmkz:  [ 0.00  0.00 ]
+Key: VANDPSZrr:  [ 0.00  0.00 ]
+Key: VANDPSZrrk:  [ 0.00  0.00 ]
+Key: VANDPSZrrkz:  [ 0.00  0.00 ]
+Key: VANDPSrm:  [ 0.00  0.00 ]
+Key: VANDPSrr:  [ 0.00  0.00 ]
+Key: VASTART_SAVE_XMM_REGS:  [ 0.00  0.00 ]
+Key: VBCSTNEBF:  [ 0.00  0.00 ]
+Key: VBCSTNESH:  [ 0.00  0.00 ]
+Key: VBLENDMPDZ:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrm:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmb:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmbk:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmbkz:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmk:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmkz:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrr:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrrk:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrrkz:  [ 0.00  0.00 ]
+Key: VBLENDMPSZ:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrm:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmb:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmbk:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmbkz:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmk:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmkz:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrr:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrrk:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrrkz:  [ 0.00  0.00 ]
+Key: VBLENDPDYrmi:  [ 0.00  0.00 ]
+Key: VBLENDPDYrri:  [ 0.00  0.00 ]
+Key: VBLENDPDrmi:  [ 0.00  0.00 ]
+Key: VBLENDPDrri:  [ 0.00  0.00 ]
+Key: VBLENDPSYrmi:  [ 0.00  0.00 ]
+Key: VBLENDPSYrri:  [ 0.00  0.00 ]
+Key: VBLENDPSrmi:  [ 0.00  0.00 ]
+Key: VBLENDPSrri:  [ 0.00  0.00 ]
+Key: VBLENDVPDYrmr:  [ 0.00  0.00 ]
+Key: VBLENDVPDYrrr:  [ 0.00  0.00 ]
+Key: VBLENDVPDrmr:  [ 0.00  0.00 ]
+Key: VBLENDVPDrrr:  [ 0.00  0.00 ]
+Key: VBLENDVPSYrmr:  [ 0.00  0.00 ]
+Key: VBLENDVPSYrrr:  [ 0.00  0.00 ]
+Key: VBLENDVPSrmr:  [ 0.00  0.00 ]
+Key: VBLENDVPSrrr:  [ 0.00  0.00 ]
+Key: VBROADCASTF:  [ 0.00  0.00 ]
+Key: VBROADCASTI:  [ 0.00  0.00 ]
+Key: VBROADCASTSDYrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSDYrr:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZ:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrmk:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrmkz:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrr:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrrk:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrrkz:  [ 0.00  0.00 ]
+Key: VBROADCASTSSYrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSSYrr:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZ:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrmk:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrmkz:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrr:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrrk:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrrkz:  [ 0.00  0.00 ]
+Key: VBROADCASTSSrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSSrr:  [ 0.00  0.00 ]
+Key: VCMPBF:  [ 0.00  0.00 ]
+Key: VCMPPDYrmi:  [ 0.00  0.00 ]
+Key: VCMPPDYrri:  [ 0.00  0.00 ]
+Key: VCMPPDZ:  [ 0.00  0.00 ]
+Key: VCMPPDZrmbi:  [ 0.00  0.00 ]
+Key: VCMPPDZrmbik:  [ 0.00  0.00 ]
+Key: VCMPPDZrmi:  [ 0.00  0.00 ]
+Key: VCMPPDZrmik:  [ 0.00  0.00 ]
+Key: VCMPPDZrri:  [ 0.00  0.00 ]
+Key: VCMPPDZrrib:  [ 0.00  0.00 ]
+Key: VCMPPDZrribk:  [ 0.00  0.00 ]
+Key: VCMPPDZrrik:  [ 0.00  0.00 ]
+Key: VCMPPDrmi:  [ 0.00  0.00 ]
+Key: VCMPPDrri:  [ 0.00  0.00 ]
+Key: VCMPPHZ:  [ 0.00  0.00 ]
+Key: VCMPPHZrmbi:  [ 0.00  0.00 ]
+Key: VCMPPHZrmbik:  [ 0.00  0.00 ]
+Key: VCMPPHZrmi:  [ 0.00  0.00 ]
+Key: VCMPPHZrmik:  [ 0.00  0.00 ]
+Key: VCMPPHZrri:  [ 0.00  0.00 ]
+Key: VCMPPHZrrib:  [ 0.00  0.00 ]
+Key: VCMPPHZrribk:  [ 0.00  0.00 ]
+Key: VCMPPHZrrik:  [ 0.00  0.00 ]
+Key: VCMPPSYrmi:  [ 0.00  0.00 ]
+Key: VCMPPSYrri:  [ 0.00  0.00 ]
+Key: VCMPPSZ:  [ 0.00  0.00 ]
+Key: VCMPPSZrmbi:  [ 0.00  0.00 ]
+Key: VCMPPSZrmbik:  [ 0.00  0.00 ]
+Key: VCMPPSZrmi:  [ 0.00  0.00 ]
+Key: VCMPPSZrmik:  [ 0.00  0.00 ]
+Key: VCMPPSZrri:  [ 0.00  0.00 ]
+Key: VCMPPSZrrib:  [ 0.00  0.00 ]
+Key: VCMPPSZrribk:  [ 0.00  0.00 ]
+Key: VCMPPSZrrik:  [ 0.00  0.00 ]
+Key: VCMPPSrmi:  [ 0.00  0.00 ]
+Key: VCMPPSrri:  [ 0.00  0.00 ]
+Key: VCMPSDZrmi:  [ 0.00  0.00 ]
+Key: VCMPSDZrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrmik_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrri:  [ 0.00  0.00 ]
+Key: VCMPSDZrri_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrrib_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrribk_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrrik_Int:  [ 0.00  0.00 ]
+Key: VCMPSDrmi:  [ 0.00  0.00 ]
+Key: VCMPSDrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSDrri:  [ 0.00  0.00 ]
+Key: VCMPSDrri_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrmi:  [ 0.00  0.00 ]
+Key: VCMPSHZrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrmik_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrri:  [ 0.00  0.00 ]
+Key: VCMPSHZrri_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrrib_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrribk_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrrik_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrmi:  [ 0.00  0.00 ]
+Key: VCMPSSZrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrmik_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrri:  [ 0.00  0.00 ]
+Key: VCMPSSZrri_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrrib_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrribk_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrrik_Int:  [ 0.00  0.00 ]
+Key: VCMPSSrmi:  [ 0.00  0.00 ]
+Key: VCMPSSrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSSrri:  [ 0.00  0.00 ]
+Key: VCMPSSrri_Int:  [ 0.00  0.00 ]
+Key: VCOMISBF:  [ 0.00  0.00 ]
+Key: VCOMISDZrm:  [ 0.00  0.00 ]
+Key: VCOMISDZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISDZrr:  [ 0.00  0.00 ]
+Key: VCOMISDZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMISDZrrb:  [ 0.00  0.00 ]
+Key: VCOMISDrm:  [ 0.00  0.00 ]
+Key: VCOMISDrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISDrr:  [ 0.00  0.00 ]
+Key: VCOMISDrr_Int:  [ 0.00  0.00 ]
+Key: VCOMISHZrm:  [ 0.00  0.00 ]
+Key: VCOMISHZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISHZrr:  [ 0.00  0.00 ]
+Key: VCOMISHZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMISHZrrb:  [ 0.00  0.00 ]
+Key: VCOMISSZrm:  [ 0.00  0.00 ]
+Key: VCOMISSZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISSZrr:  [ 0.00  0.00 ]
+Key: VCOMISSZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMISSZrrb:  [ 0.00  0.00 ]
+Key: VCOMISSrm:  [ 0.00  0.00 ]
+Key: VCOMISSrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISSrr:  [ 0.00  0.00 ]
+Key: VCOMISSrr_Int:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZ:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZmr:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZmrk:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZrr:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZrrk:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZrrkz:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZ:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZmr:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZmrk:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZrr:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZrrk:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZrrkz:  [ 0.00  0.00 ]
+Key: VCOMXSDZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMXSDZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMXSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VCOMXSHZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMXSHZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMXSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VCOMXSSZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMXSSZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMXSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VCVT:  [ 0.00  0.00 ]
+Key: VCVTBF:  [ 0.00  0.00 ]
+Key: VCVTBIASPH:  [ 0.00  0.00 ]
+Key: VCVTDQ:  [ 0.00  0.00 ]
+Key: VCVTHF:  [ 0.00  0.00 ]
+Key: VCVTNE:  [ 0.00  0.00 ]
+Key: VCVTNEEBF:  [ 0.00  0.00 ]
+Key: VCVTNEEPH:  [ 0.00  0.00 ]
+Key: VCVTNEOBF:  [ 0.00  0.00 ]
+Key: VCVTNEOPH:  [ 0.00  0.00 ]
+Key: VCVTNEPS:  [ 0.00  0.00 ]
+Key: VCVTPD:  [ 0.00  0.00 ]
+Key: VCVTPH:  [ 0.00  0.00 ]
+Key: VCVTPS:  [ 0.00  0.00 ]
+Key: VCVTQQ:  [ 0.00  0.00 ]
+Key: VCVTSD:  [ 0.00  0.00 ]
+Key: VCVTSH:  [ 0.00  0.00 ]
+Key: VCVTSI:  [ 0.00  0.00 ]
+Key: VCVTSS:  [ 0.00  0.00 ]
+Key: VCVTTBF:  [ 0.00  0.00 ]
+Key: VCVTTPD:  [ 0.00  0.00 ]
+Key: VCVTTPH:  [ 0.00  0.00 ]
+Key: VCVTTPS:  [ 0.00  0.00 ]
+Key: VCVTTSD:  [ 0.00  0.00 ]
+Key: VCVTTSH:  [ 0.00  0.00 ]
+Key: VCVTTSS:  [ 0.00  0.00 ]
+Key: VCVTUDQ:  [ 0.00  0.00 ]
+Key: VCVTUQQ:  [ 0.00  0.00 ]
+Key: VCVTUSI:  [ 0.00  0.00 ]
+Key: VCVTUW:  [ 0.00  0.00 ]
+Key: VCVTW:  [ 0.00  0.00 ]
+Key: VDBPSADBWZ:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrmi:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrmik:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrmikz:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrri:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrrik:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrrikz:  [ 0.00  0.00 ]
+Key: VDIVBF:  [ 0.00  0.00 ]
+Key: VDIVPDYrm:  [ 0.00  0.00 ]
+Key: VDIVPDYrr:  [ 0.00  0.00 ]
+Key: VDIVPDZ:  [ 0.00  0.00 ]
+Key: VDIVPDZrm:  [ 0.00  0.00 ]
+Key: VDIVPDZrmb:  [ 0.00  0.00 ]
+Key: VDIVPDZrmbk:  [ 0.00  0.00 ]
+Key: VDIVPDZrmbkz:  [ 0.00  0.00 ]
+Key: VDIVPDZrmk:  [ 0.00  0.00 ]
+Key: VDIVPDZrmkz:  [ 0.00  0.00 ]
+Key: VDIVPDZrr:  [ 0.00  0.00 ]
+Key: VDIVPDZrrb:  [ 0.00  0.00 ]
+Key: VDIVPDZrrbk:  [ 0.00  0.00 ]
+Key: VDIVPDZrrbkz:  [ 0.00  0.00 ]
+Key: VDIVPDZrrk:  [ 0.00  0.00 ]
+Key: VDIVPDZrrkz:  [ 0.00  0.00 ]
+Key: VDIVPDrm:  [ 0.00  0.00 ]
+Key: VDIVPDrr:  [ 0.00  0.00 ]
+Key: VDIVPHZ:  [ 0.00  0.00 ]
+Key: VDIVPHZrm:  [ 0.00  0.00 ]
+Key: VDIVPHZrmb:  [ 0.00  0.00 ]
+Key: VDIVPHZrmbk:  [ 0.00  0.00 ]
+Key: VDIVPHZrmbkz:  [ 0.00  0.00 ]
+Key: VDIVPHZrmk:  [ 0.00  0.00 ]
+Key: VDIVPHZrmkz:  [ 0.00  0.00 ]
+Key: VDIVPHZrr:  [ 0.00  0.00 ]
+Key: VDIVPHZrrb:  [ 0.00  0.00 ]
+Key: VDIVPHZrrbk:  [ 0.00  0.00 ]
+Key: VDIVPHZrrbkz:  [ 0.00  0.00 ]
+Key: VDIVPHZrrk:  [ 0.00  0.00 ]
+Key: VDIVPHZrrkz:  [ 0.00  0.00 ]
+Key: VDIVPSYrm:  [ 0.00  0.00 ]
+Key: VDIVPSYrr:  [ 0.00  0.00 ]
+Key: VDIVPSZ:  [ 0.00  0.00 ]
+Key: VDIVPSZrm:  [ 0.00  0.00 ]
+Key: VDIVPSZrmb:  [ 0.00  0.00 ]
+Key: VDIVPSZrmbk:  [ 0.00  0.00 ]
+Key: VDIVPSZrmbkz:  [ 0.00  0.00 ]
+Key: VDIVPSZrmk:  [ 0.00  0.00 ]
+Key: VDIVPSZrmkz:  [ 0.00  0.00 ]
+Key: VDIVPSZrr:  [ 0.00  0.00 ]
+Key: VDIVPSZrrb:  [ 0.00  0.00 ]
+Key: VDIVPSZrrbk:  [ 0.00  0.00 ]
+Key: VDIVPSZrrbkz:  [ 0.00  0.00 ]
+Key: VDIVPSZrrk:  [ 0.00  0.00 ]
+Key: VDIVPSZrrkz:  [ 0.00  0.00 ]
+Key: VDIVPSrm:  [ 0.00  0.00 ]
+Key: VDIVPSrr:  [ 0.00  0.00 ]
+Key: VDIVSDZrm:  [ 0.00  0.00 ]
+Key: VDIVSDZrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrr:  [ 0.00  0.00 ]
+Key: VDIVSDZrr_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSDrm:  [ 0.00  0.00 ]
+Key: VDIVSDrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSDrr:  [ 0.00  0.00 ]
+Key: VDIVSDrr_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrm:  [ 0.00  0.00 ]
+Key: VDIVSHZrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrr:  [ 0.00  0.00 ]
+Key: VDIVSHZrr_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrm:  [ 0.00  0.00 ]
+Key: VDIVSSZrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrr:  [ 0.00  0.00 ]
+Key: VDIVSSZrr_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSSrm:  [ 0.00  0.00 ]
+Key: VDIVSSrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSSrr:  [ 0.00  0.00 ]
+Key: VDIVSSrr_Int:  [ 0.00  0.00 ]
+Key: VDPBF:  [ 0.00  0.00 ]
+Key: VDPPDrmi:  [ 0.00  0.00 ]
+Key: VDPPDrri:  [ 0.00  0.00 ]
+Key: VDPPHPSZ:  [ 0.00  0.00 ]
+Key: VDPPHPSZm:  [ 0.00  0.00 ]
+Key: VDPPHPSZmb:  [ 0.00  0.00 ]
+Key: VDPPHPSZmbk:  [ 0.00  0.00 ]
+Key: VDPPHPSZmbkz:  [ 0.00  0.00 ]
+Key: VDPPHPSZmk:  [ 0.00  0.00 ]
+Key: VDPPHPSZmkz:  [ 0.00  0.00 ]
+Key: VDPPHPSZr:  [ 0.00  0.00 ]
+Key: VDPPHPSZrk:  [ 0.00  0.00 ]
+Key: VDPPHPSZrkz:  [ 0.00  0.00 ]
+Key: VDPPSYrmi:  [ 0.00  0.00 ]
+Key: VDPPSYrri:  [ 0.00  0.00 ]
+Key: VDPPSrmi:  [ 0.00  0.00 ]
+Key: VDPPSrri:  [ 0.00  0.00 ]
+Key: VERRm:  [ 0.00  0.00 ]
+Key: VERRr:  [ 0.00  0.00 ]
+Key: VERWm:  [ 0.00  0.00 ]
+Key: VERWr:  [ 0.00  0.00 ]
+Key: VEXP:  [ 0.00  0.00 ]
+Key: VEXPANDPDZ:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrm:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrmk:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrmkz:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrr:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrrk:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrrkz:  [ 0.00  0.00 ]
+Key: VEXPANDPSZ:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrm:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrmk:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrmkz:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrr:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrrk:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrrkz:  [ 0.00  0.00 ]
+Key: VEXTRACTF:  [ 0.00  0.00 ]
+Key: VEXTRACTI:  [ 0.00  0.00 ]
+Key: VEXTRACTPSZmri:  [ 0.00  0.00 ]
+Key: VEXTRACTPSZrri:  [ 0.00  0.00 ]
+Key: VEXTRACTPSmri:  [ 0.00  0.00 ]
+Key: VEXTRACTPSrri:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZ:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZm:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmb:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmbk:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmbkz:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmk:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmkz:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZr:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrb:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrbk:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrbkz:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrk:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrkz:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZm:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZmk:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZmkz:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZr:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrb:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrbk:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrbkz:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrk:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrkz:  [ 0.00  0.00 ]
+Key: VFCMULCPHZ:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrm:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmb:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmbk:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmbkz:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmk:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmkz:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrr:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrb:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrbk:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrbkz:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrk:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrkz:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrm:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrmk:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrmkz:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrr:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrb:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrbk:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrbkz:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrk:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZ:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmbi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmbik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmbikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrri:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrrib:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrribk:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrribkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrrik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrrikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZ:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmbi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmbik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmbikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrri:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrrib:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrribk:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrribkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrrik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrrikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrmi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrmik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrmikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrri:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrrib:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrribk:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrribkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrrik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrrikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrmi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrmik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrmikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrri:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrrib:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrribk:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrribkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrrik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrrikz:  [ 0.00  0.00 ]
+Key: VFMADD:  [ 0.00  0.00 ]
+Key: VFMADDCPHZ:  [ 0.00  0.00 ]
+Key: VFMADDCPHZm:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmb:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmbk:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmbkz:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmk:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmkz:  [ 0.00  0.00 ]
+Key: VFMADDCPHZr:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrb:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrbk:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrbkz:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrk:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrkz:  [ 0.00  0.00 ]
+Key: VFMADDCSHZm:  [ 0.00  0.00 ]
+Key: VFMADDCSHZmk:  [ 0.00  0.00 ]
+Key: VFMADDCSHZmkz:  [ 0.00  0.00 ]
+Key: VFMADDCSHZr:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrb:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrbk:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrbkz:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrk:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrkz:  [ 0.00  0.00 ]
+Key: VFMADDPD:  [ 0.00  0.00 ]
+Key: VFMADDPS:  [ 0.00  0.00 ]
+Key: VFMADDSD:  [ 0.00  0.00 ]
+Key: VFMADDSS:  [ 0.00  0.00 ]
+Key: VFMADDSUB:  [ 0.00  0.00 ]
+Key: VFMADDSUBPD:  [ 0.00  0.00 ]
+Key: VFMADDSUBPS:  [ 0.00  0.00 ]
+Key: VFMSUB:  [ 0.00  0.00 ]
+Key: VFMSUBADD:  [ 0.00  0.00 ]
+Key: VFMSUBADDPD:  [ 0.00  0.00 ]
+Key: VFMSUBADDPS:  [ 0.00  0.00 ]
+Key: VFMSUBPD:  [ 0.00  0.00 ]
+Key: VFMSUBPS:  [ 0.00  0.00 ]
+Key: VFMSUBSD:  [ 0.00  0.00 ]
+Key: VFMSUBSS:  [ 0.00  0.00 ]
+Key: VFMULCPHZ:  [ 0.00  0.00 ]
+Key: VFMULCPHZrm:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmb:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmbk:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmbkz:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmk:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmkz:  [ 0.00  0.00 ]
+Key: VFMULCPHZrr:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrb:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrbk:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrbkz:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrk:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrkz:  [ 0.00  0.00 ]
+Key: VFMULCSHZrm:  [ 0.00  0.00 ]
+Key: VFMULCSHZrmk:  [ 0.00  0.00 ]
+Key: VFMULCSHZrmkz:  [ 0.00  0.00 ]
+Key: VFMULCSHZrr:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrb:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrbk:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrbkz:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrk:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrkz:  [ 0.00  0.00 ]
+Key: VFNMADD:  [ 0.00  0.00 ]
+Key: VFNMADDPD:  [ 0.00  0.00 ]
+Key: VFNMADDPS:  [ 0.00  0.00 ]
+Key: VFNMADDSD:  [ 0.00  0.00 ]
+Key: VFNMADDSS:  [ 0.00  0.00 ]
+Key: VFNMSUB:  [ 0.00  0.00 ]
+Key: VFNMSUBPD:  [ 0.00  0.00 ]
+Key: VFNMSUBPS:  [ 0.00  0.00 ]
+Key: VFNMSUBSD:  [ 0.00  0.00 ]
+Key: VFNMSUBSS:  [ 0.00  0.00 ]
+Key: VFPCLASSBF:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZ:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZmbi:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZmbik:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZri:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZ:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZmbi:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZmbik:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZri:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZ:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZmbi:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZmbik:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZri:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSSDZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSSDZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSSDZri:  [ 0.00  0.00 ]
+Key: VFPCLASSSDZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSSHZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSSHZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSSHZri:  [ 0.00  0.00 ]
+Key: VFPCLASSSHZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSSSZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSSSZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSSSZri:  [ 0.00  0.00 ]
+Key: VFPCLASSSSZrik:  [ 0.00  0.00 ]
+Key: VFRCZPDYrm:  [ 0.00  0.00 ]
+Key: VFRCZPDYrr:  [ 0.00  0.00 ]
+Key: VFRCZPDrm:  [ 0.00  0.00 ]
+Key: VFRCZPDrr:  [ 0.00  0.00 ]
+Key: VFRCZPSYrm:  [ 0.00  0.00 ]
+Key: VFRCZPSYrr:  [ 0.00  0.00 ]
+Key: VFRCZPSrm:  [ 0.00  0.00 ]
+Key: VFRCZPSrr:  [ 0.00  0.00 ]
+Key: VFRCZSDrm:  [ 0.00  0.00 ]
+Key: VFRCZSDrr:  [ 0.00  0.00 ]
+Key: VFRCZSSrm:  [ 0.00  0.00 ]
+Key: VFRCZSSrr:  [ 0.00  0.00 ]
+Key: VGATHERDPDYrm:  [ 0.00  0.00 ]
+Key: VGATHERDPDZ:  [ 0.00  0.00 ]
+Key: VGATHERDPDZrm:  [ 0.00  0.00 ]
+Key: VGATHERDPDrm:  [ 0.00  0.00 ]
+Key: VGATHERDPSYrm:  [ 0.00  0.00 ]
+Key: VGATHERDPSZ:  [ 0.00  0.00 ]
+Key: VGATHERDPSZrm:  [ 0.00  0.00 ]
+Key: VGATHERDPSrm:  [ 0.00  0.00 ]
+Key: VGATHERPF:  [ 0.00  0.00 ]
+Key: VGATHERQPDYrm:  [ 0.00  0.00 ]
+Key: VGATHERQPDZ:  [ 0.00  0.00 ]
+Key: VGATHERQPDZrm:  [ 0.00  0.00 ]
+Key: VGATHERQPDrm:  [ 0.00  0.00 ]
+Key: VGATHERQPSYrm:  [ 0.00  0.00 ]
+Key: VGATHERQPSZ:  [ 0.00  0.00 ]
+Key: VGATHERQPSZrm:  [ 0.00  0.00 ]
+Key: VGATHERQPSrm:  [ 0.00  0.00 ]
+Key: VGETEXPBF:  [ 0.00  0.00 ]
+Key: VGETEXPPDZ:  [ 0.00  0.00 ]
+Key: VGETEXPPDZm:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmb:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmbk:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmk:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPPDZr:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrb:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrk:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPPHZ:  [ 0.00  0.00 ]
+Key: VGETEXPPHZm:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmb:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmbk:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmk:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPPHZr:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrb:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrk:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPPSZ:  [ 0.00  0.00 ]
+Key: VGETEXPPSZm:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmb:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmbk:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmk:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPPSZr:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrb:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrk:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPSDZm:  [ 0.00  0.00 ]
+Key: VGETEXPSDZmk:  [ 0.00  0.00 ]
+Key: VGETEXPSDZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPSDZr:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrb:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrk:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPSHZm:  [ 0.00  0.00 ]
+Key: VGETEXPSHZmk:  [ 0.00  0.00 ]
+Key: VGETEXPSHZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPSHZr:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrb:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrk:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPSSZm:  [ 0.00  0.00 ]
+Key: VGETEXPSSZmk:  [ 0.00  0.00 ]
+Key: VGETEXPSSZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPSSZr:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrb:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrk:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrkz:  [ 0.00  0.00 ]
+Key: VGETMANTBF:  [ 0.00  0.00 ]
+Key: VGETMANTPDZ:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmbi:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmbik:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmbikz:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrri:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTPHZ:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmbi:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmbik:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmbikz:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrri:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTPSZ:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmbi:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmbik:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmbikz:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrri:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrri:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrri:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrri:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrrikz:  [ 0.00  0.00 ]
+Key: VGF:  [ 0.00  0.00 ]
+Key: VHADDPDYrm:  [ 0.00  0.00 ]
+Key: VHADDPDYrr:  [ 0.00  0.00 ]
+Key: VHADDPDrm:  [ 0.00  0.00 ]
+Key: VHADDPDrr:  [ 0.00  0.00 ]
+Key: VHADDPSYrm:  [ 0.00  0.00 ]
+Key: VHADDPSYrr:  [ 0.00  0.00 ]
+Key: VHADDPSrm:  [ 0.00  0.00 ]
+Key: VHADDPSrr:  [ 0.00  0.00 ]
+Key: VHSUBPDYrm:  [ 0.00  0.00 ]
+Key: VHSUBPDYrr:  [ 0.00  0.00 ]
+Key: VHSUBPDrm:  [ 0.00  0.00 ]
+Key: VHSUBPDrr:  [ 0.00  0.00 ]
+Key: VHSUBPSYrm:  [ 0.00  0.00 ]
+Key: VHSUBPSYrr:  [ 0.00  0.00 ]
+Key: VHSUBPSrm:  [ 0.00  0.00 ]
+Key: VHSUBPSrr:  [ 0.00  0.00 ]
+Key: VINSERTF:  [ 0.00  0.00 ]
+Key: VINSERTI:  [ 0.00  0.00 ]
+Key: VINSERTPSZrmi:  [ 0.00  0.00 ]
+Key: VINSERTPSZrri:  [ 0.00  0.00 ]
+Key: VINSERTPSrmi:  [ 0.00  0.00 ]
+Key: VINSERTPSrri:  [ 0.00  0.00 ]
+Key: VLDDQUYrm:  [ 0.00  0.00 ]
+Key: VLDDQUrm:  [ 0.00  0.00 ]
+Key: VLDMXCSR:  [ 0.00  0.00 ]
+Key: VMASKMOVDQU:  [ 0.00  0.00 ]
+Key: VMASKMOVPDYmr:  [ 0.00  0.00 ]
+Key: VMASKMOVPDYrm:  [ 0.00  0.00 ]
+Key: VMASKMOVPDmr:  [ 0.00  0.00 ]
+Key: VMASKMOVPDrm:  [ 0.00  0.00 ]
+Key: VMASKMOVPSYmr:  [ 0.00  0.00 ]
+Key: VMASKMOVPSYrm:  [ 0.00  0.00 ]
+Key: VMASKMOVPSmr:  [ 0.00  0.00 ]
+Key: VMASKMOVPSrm:  [ 0.00  0.00 ]
+Key: VMAXBF:  [ 0.00  0.00 ]
+Key: VMAXCPDYrm:  [ 0.00  0.00 ]
+Key: VMAXCPDYrr:  [ 0.00  0.00 ]
+Key: VMAXCPDZ:  [ 0.00  0.00 ]
+Key: VMAXCPDZrm:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmb:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmbk:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmk:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmkz:  [ 0.00  0.00 ]
+Key: VMAXCPDZrr:  [ 0.00  0.00 ]
+Key: VMAXCPDZrrk:  [ 0.00  0.00 ]
+Key: VMAXCPDZrrkz:  [ 0.00  0.00 ]
+Key: VMAXCPDrm:  [ 0.00  0.00 ]
+Key: VMAXCPDrr:  [ 0.00  0.00 ]
+Key: VMAXCPHZ:  [ 0.00  0.00 ]
+Key: VMAXCPHZrm:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmb:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmbk:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmk:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmkz:  [ 0.00  0.00 ]
+Key: VMAXCPHZrr:  [ 0.00  0.00 ]
+Key: VMAXCPHZrrk:  [ 0.00  0.00 ]
+Key: VMAXCPHZrrkz:  [ 0.00  0.00 ]
+Key: VMAXCPSYrm:  [ 0.00  0.00 ]
+Key: VMAXCPSYrr:  [ 0.00  0.00 ]
+Key: VMAXCPSZ:  [ 0.00  0.00 ]
+Key: VMAXCPSZrm:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmb:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmbk:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmk:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmkz:  [ 0.00  0.00 ]
+Key: VMAXCPSZrr:  [ 0.00  0.00 ]
+Key: VMAXCPSZrrk:  [ 0.00  0.00 ]
+Key: VMAXCPSZrrkz:  [ 0.00  0.00 ]
+Key: VMAXCPSrm:  [ 0.00  0.00 ]
+Key: VMAXCPSrr:  [ 0.00  0.00 ]
+Key: VMAXCSDZrm:  [ 0.00  0.00 ]
+Key: VMAXCSDZrr:  [ 0.00  0.00 ]
+Key: VMAXCSDrm:  [ 0.00  0.00 ]
+Key: VMAXCSDrr:  [ 0.00  0.00 ]
+Key: VMAXCSHZrm:  [ 0.00  0.00 ]
+Key: VMAXCSHZrr:  [ 0.00  0.00 ]
+Key: VMAXCSSZrm:  [ 0.00  0.00 ]
+Key: VMAXCSSZrr:  [ 0.00  0.00 ]
+Key: VMAXCSSrm:  [ 0.00  0.00 ]
+Key: VMAXCSSrr:  [ 0.00  0.00 ]
+Key: VMAXPDYrm:  [ 0.00  0.00 ]
+Key: VMAXPDYrr:  [ 0.00  0.00 ]
+Key: VMAXPDZ:  [ 0.00  0.00 ]
+Key: VMAXPDZrm:  [ 0.00  0.00 ]
+Key: VMAXPDZrmb:  [ 0.00  0.00 ]
+Key: VMAXPDZrmbk:  [ 0.00  0.00 ]
+Key: VMAXPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXPDZrmk:  [ 0.00  0.00 ]
+Key: VMAXPDZrmkz:  [ 0.00  0.00 ]
+Key: VMAXPDZrr:  [ 0.00  0.00 ]
+Key: VMAXPDZrrb:  [ 0.00  0.00 ]
+Key: VMAXPDZrrbk:  [ 0.00  0.00 ]
+Key: VMAXPDZrrbkz:  [ 0.00  0.00 ]
+Key: VMAXPDZrrk:  [ 0.00  0.00 ]
+Key: VMAXPDZrrkz:  [ 0.00  0.00 ]
+Key: VMAXPDrm:  [ 0.00  0.00 ]
+Key: VMAXPDrr:  [ 0.00  0.00 ]
+Key: VMAXPHZ:  [ 0.00  0.00 ]
+Key: VMAXPHZrm:  [ 0.00  0.00 ]
+Key: VMAXPHZrmb:  [ 0.00  0.00 ]
+Key: VMAXPHZrmbk:  [ 0.00  0.00 ]
+Key: VMAXPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXPHZrmk:  [ 0.00  0.00 ]
+Key: VMAXPHZrmkz:  [ 0.00  0.00 ]
+Key: VMAXPHZrr:  [ 0.00  0.00 ]
+Key: VMAXPHZrrb:  [ 0.00  0.00 ]
+Key: VMAXPHZrrbk:  [ 0.00  0.00 ]
+Key: VMAXPHZrrbkz:  [ 0.00  0.00 ]
+Key: VMAXPHZrrk:  [ 0.00  0.00 ]
+Key: VMAXPHZrrkz:  [ 0.00  0.00 ]
+Key: VMAXPSYrm:  [ 0.00  0.00 ]
+Key: VMAXPSYrr:  [ 0.00  0.00 ]
+Key: VMAXPSZ:  [ 0.00  0.00 ]
+Key: VMAXPSZrm:  [ 0.00  0.00 ]
+Key: VMAXPSZrmb:  [ 0.00  0.00 ]
+Key: VMAXPSZrmbk:  [ 0.00  0.00 ]
+Key: VMAXPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXPSZrmk:  [ 0.00  0.00 ]
+Key: VMAXPSZrmkz:  [ 0.00  0.00 ]
+Key: VMAXPSZrr:  [ 0.00  0.00 ]
+Key: VMAXPSZrrb:  [ 0.00  0.00 ]
+Key: VMAXPSZrrbk:  [ 0.00  0.00 ]
+Key: VMAXPSZrrbkz:  [ 0.00  0.00 ]
+Key: VMAXPSZrrk:  [ 0.00  0.00 ]
+Key: VMAXPSZrrkz:  [ 0.00  0.00 ]
+Key: VMAXPSrm:  [ 0.00  0.00 ]
+Key: VMAXPSrr:  [ 0.00  0.00 ]
+Key: VMAXSDZrm:  [ 0.00  0.00 ]
+Key: VMAXSDZrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrr:  [ 0.00  0.00 ]
+Key: VMAXSDZrr_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSDrm:  [ 0.00  0.00 ]
+Key: VMAXSDrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSDrr:  [ 0.00  0.00 ]
+Key: VMAXSDrr_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrm:  [ 0.00  0.00 ]
+Key: VMAXSHZrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrr:  [ 0.00  0.00 ]
+Key: VMAXSHZrr_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrm:  [ 0.00  0.00 ]
+Key: VMAXSSZrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrr:  [ 0.00  0.00 ]
+Key: VMAXSSZrr_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSSrm:  [ 0.00  0.00 ]
+Key: VMAXSSrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSSrr:  [ 0.00  0.00 ]
+Key: VMAXSSrr_Int:  [ 0.00  0.00 ]
+Key: VMCALL:  [ 0.00  0.00 ]
+Key: VMCLEARm:  [ 0.00  0.00 ]
+Key: VMFUNC:  [ 0.00  0.00 ]
+Key: VMINBF:  [ 0.00  0.00 ]
+Key: VMINCPDYrm:  [ 0.00  0.00 ]
+Key: VMINCPDYrr:  [ 0.00  0.00 ]
+Key: VMINCPDZ:  [ 0.00  0.00 ]
+Key: VMINCPDZrm:  [ 0.00  0.00 ]
+Key: VMINCPDZrmb:  [ 0.00  0.00 ]
+Key: VMINCPDZrmbk:  [ 0.00  0.00 ]
+Key: VMINCPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMINCPDZrmk:  [ 0.00  0.00 ]
+Key: VMINCPDZrmkz:  [ 0.00  0.00 ]
+Key: VMINCPDZrr:  [ 0.00  0.00 ]
+Key: VMINCPDZrrk:  [ 0.00  0.00 ]
+Key: VMINCPDZrrkz:  [ 0.00  0.00 ]
+Key: VMINCPDrm:  [ 0.00  0.00 ]
+Key: VMINCPDrr:  [ 0.00  0.00 ]
+Key: VMINCPHZ:  [ 0.00  0.00 ]
+Key: VMINCPHZrm:  [ 0.00  0.00 ]
+Key: VMINCPHZrmb:  [ 0.00  0.00 ]
+Key: VMINCPHZrmbk:  [ 0.00  0.00 ]
+Key: VMINCPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMINCPHZrmk:  [ 0.00  0.00 ]
+Key: VMINCPHZrmkz:  [ 0.00  0.00 ]
+Key: VMINCPHZrr:  [ 0.00  0.00 ]
+Key: VMINCPHZrrk:  [ 0.00  0.00 ]
+Key: VMINCPHZrrkz:  [ 0.00  0.00 ]
+Key: VMINCPSYrm:  [ 0.00  0.00 ]
+Key: VMINCPSYrr:  [ 0.00  0.00 ]
+Key: VMINCPSZ:  [ 0.00  0.00 ]
+Key: VMINCPSZrm:  [ 0.00  0.00 ]
+Key: VMINCPSZrmb:  [ 0.00  0.00 ]
+Key: VMINCPSZrmbk:  [ 0.00  0.00 ]
+Key: VMINCPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMINCPSZrmk:  [ 0.00  0.00 ]
+Key: VMINCPSZrmkz:  [ 0.00  0.00 ]
+Key: VMINCPSZrr:  [ 0.00  0.00 ]
+Key: VMINCPSZrrk:  [ 0.00  0.00 ]
+Key: VMINCPSZrrkz:  [ 0.00  0.00 ]
+Key: VMINCPSrm:  [ 0.00  0.00 ]
+Key: VMINCPSrr:  [ 0.00  0.00 ]
+Key: VMINCSDZrm:  [ 0.00  0.00 ]
+Key: VMINCSDZrr:  [ 0.00  0.00 ]
+Key: VMINCSDrm:  [ 0.00  0.00 ]
+Key: VMINCSDrr:  [ 0.00  0.00 ]
+Key: VMINCSHZrm:  [ 0.00  0.00 ]
+Key: VMINCSHZrr:  [ 0.00  0.00 ]
+Key: VMINCSSZrm:  [ 0.00  0.00 ]
+Key: VMINCSSZrr:  [ 0.00  0.00 ]
+Key: VMINCSSrm:  [ 0.00  0.00 ]
+Key: VMINCSSrr:  [ 0.00  0.00 ]
+Key: VMINMAXBF:  [ 0.00  0.00 ]
+Key: VMINMAXPDZ:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmbi:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmbik:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmbikz:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmi:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmik:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmikz:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrri:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrrib:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrribk:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrribkz:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrrik:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrrikz:  [ 0.00  0.00 ]
+Key: VMINMAXPHZ:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmbi:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmbik:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmbikz:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmi:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmik:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmikz:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrri:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrrib:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrribk:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrribkz:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrrik:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrrikz:  [ 0.00  0.00 ]
+Key: VMINMAXPSZ:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmbi:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmbik:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmbikz:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmi:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmik:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmikz:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrri:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrrib:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrribk:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrribkz:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrrik:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrrikz:  [ 0.00  0.00 ]
+Key: VMINMAXSDrmi:  [ 0.00  0.00 ]
+Key: VMINMAXSDrmi_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrmik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrmikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrri:  [ 0.00  0.00 ]
+Key: VMINMAXSDrri_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrrib_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrribk_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrribkz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrrik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrrikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrmi:  [ 0.00  0.00 ]
+Key: VMINMAXSHrmi_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrmik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrmikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrri:  [ 0.00  0.00 ]
+Key: VMINMAXSHrri_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrrib_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrribk_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrribkz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrrik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrrikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrmi:  [ 0.00  0.00 ]
+Key: VMINMAXSSrmi_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrmik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrmikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrri:  [ 0.00  0.00 ]
+Key: VMINMAXSSrri_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrrib_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrribk_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrribkz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrrik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrrikz_Int:  [ 0.00  0.00 ]
+Key: VMINPDYrm:  [ 0.00  0.00 ]
+Key: VMINPDYrr:  [ 0.00  0.00 ]
+Key: VMINPDZ:  [ 0.00  0.00 ]
+Key: VMINPDZrm:  [ 0.00  0.00 ]
+Key: VMINPDZrmb:  [ 0.00  0.00 ]
+Key: VMINPDZrmbk:  [ 0.00  0.00 ]
+Key: VMINPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMINPDZrmk:  [ 0.00  0.00 ]
+Key: VMINPDZrmkz:  [ 0.00  0.00 ]
+Key: VMINPDZrr:  [ 0.00  0.00 ]
+Key: VMINPDZrrb:  [ 0.00  0.00 ]
+Key: VMINPDZrrbk:  [ 0.00  0.00 ]
+Key: VMINPDZrrbkz:  [ 0.00  0.00 ]
+Key: VMINPDZrrk:  [ 0.00  0.00 ]
+Key: VMINPDZrrkz:  [ 0.00  0.00 ]
+Key: VMINPDrm:  [ 0.00  0.00 ]
+Key: VMINPDrr:  [ 0.00  0.00 ]
+Key: VMINPHZ:  [ 0.00  0.00 ]
+Key: VMINPHZrm:  [ 0.00  0.00 ]
+Key: VMINPHZrmb:  [ 0.00  0.00 ]
+Key: VMINPHZrmbk:  [ 0.00  0.00 ]
+Key: VMINPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMINPHZrmk:  [ 0.00  0.00 ]
+Key: VMINPHZrmkz:  [ 0.00  0.00 ]
+Key: VMINPHZrr:  [ 0.00  0.00 ]
+Key: VMINPHZrrb:  [ 0.00  0.00 ]
+Key: VMINPHZrrbk:  [ 0.00  0.00 ]
+Key: VMINPHZrrbkz:  [ 0.00  0.00 ]
+Key: VMINPHZrrk:  [ 0.00  0.00 ]
+Key: VMINPHZrrkz:  [ 0.00  0.00 ]
+Key: VMINPSYrm:  [ 0.00  0.00 ]
+Key: VMINPSYrr:  [ 0.00  0.00 ]
+Key: VMINPSZ:  [ 0.00  0.00 ]
+Key: VMINPSZrm:  [ 0.00  0.00 ]
+Key: VMINPSZrmb:  [ 0.00  0.00 ]
+Key: VMINPSZrmbk:  [ 0.00  0.00 ]
+Key: VMINPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMINPSZrmk:  [ 0.00  0.00 ]
+Key: VMINPSZrmkz:  [ 0.00  0.00 ]
+Key: VMINPSZrr:  [ 0.00  0.00 ]
+Key: VMINPSZrrb:  [ 0.00  0.00 ]
+Key: VMINPSZrrbk:  [ 0.00  0.00 ]
+Key: VMINPSZrrbkz:  [ 0.00  0.00 ]
+Key: VMINPSZrrk:  [ 0.00  0.00 ]
+Key: VMINPSZrrkz:  [ 0.00  0.00 ]
+Key: VMINPSrm:  [ 0.00  0.00 ]
+Key: VMINPSrr:  [ 0.00  0.00 ]
+Key: VMINSDZrm:  [ 0.00  0.00 ]
+Key: VMINSDZrm_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrr:  [ 0.00  0.00 ]
+Key: VMINSDZrr_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMINSDrm:  [ 0.00  0.00 ]
+Key: VMINSDrm_Int:  [ 0.00  0.00 ]
+Key: VMINSDrr:  [ 0.00  0.00 ]
+Key: VMINSDrr_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrm:  [ 0.00  0.00 ]
+Key: VMINSHZrm_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrr:  [ 0.00  0.00 ]
+Key: VMINSHZrr_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrm:  [ 0.00  0.00 ]
+Key: VMINSSZrm_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrr:  [ 0.00  0.00 ]
+Key: VMINSSZrr_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMINSSrm:  [ 0.00  0.00 ]
+Key: VMINSSrm_Int:  [ 0.00  0.00 ]
+Key: VMINSSrr:  [ 0.00  0.00 ]
+Key: VMINSSrr_Int:  [ 0.00  0.00 ]
+Key: VMLAUNCH:  [ 0.00  0.00 ]
+Key: VMLOAD:  [ 0.00  0.00 ]
+Key: VMMCALL:  [ 0.00  0.00 ]
+Key: VMOV:  [ 0.00  0.00 ]
+Key: VMOVAPDYmr:  [ 0.00  0.00 ]
+Key: VMOVAPDYrm:  [ 0.00  0.00 ]
+Key: VMOVAPDYrr:  [ 0.00  0.00 ]
+Key: VMOVAPDYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPDZ:  [ 0.00  0.00 ]
+Key: VMOVAPDZmr:  [ 0.00  0.00 ]
+Key: VMOVAPDZmrk:  [ 0.00  0.00 ]
+Key: VMOVAPDZrm:  [ 0.00  0.00 ]
+Key: VMOVAPDZrmk:  [ 0.00  0.00 ]
+Key: VMOVAPDZrmkz:  [ 0.00  0.00 ]
+Key: VMOVAPDZrr:  [ 0.00  0.00 ]
+Key: VMOVAPDZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPDZrrk:  [ 0.00  0.00 ]
+Key: VMOVAPDZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVAPDZrrkz:  [ 0.00  0.00 ]
+Key: VMOVAPDZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVAPDmr:  [ 0.00  0.00 ]
+Key: VMOVAPDrm:  [ 0.00  0.00 ]
+Key: VMOVAPDrr:  [ 0.00  0.00 ]
+Key: VMOVAPDrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSYmr:  [ 0.00  0.00 ]
+Key: VMOVAPSYrm:  [ 0.00  0.00 ]
+Key: VMOVAPSYrr:  [ 0.00  0.00 ]
+Key: VMOVAPSYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSZ:  [ 0.00  0.00 ]
+Key: VMOVAPSZmr:  [ 0.00  0.00 ]
+Key: VMOVAPSZmrk:  [ 0.00  0.00 ]
+Key: VMOVAPSZrm:  [ 0.00  0.00 ]
+Key: VMOVAPSZrmk:  [ 0.00  0.00 ]
+Key: VMOVAPSZrmkz:  [ 0.00  0.00 ]
+Key: VMOVAPSZrr:  [ 0.00  0.00 ]
+Key: VMOVAPSZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSZrrk:  [ 0.00  0.00 ]
+Key: VMOVAPSZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSZrrkz:  [ 0.00  0.00 ]
+Key: VMOVAPSZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSmr:  [ 0.00  0.00 ]
+Key: VMOVAPSrm:  [ 0.00  0.00 ]
+Key: VMOVAPSrr:  [ 0.00  0.00 ]
+Key: VMOVAPSrr_REV:  [ 0.00  0.00 ]
+Key: VMOVDDUPYrm:  [ 0.00  0.00 ]
+Key: VMOVDDUPYrr:  [ 0.00  0.00 ]
+Key: VMOVDDUPZ:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrm:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrmk:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrmkz:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrr:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrrk:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrrkz:  [ 0.00  0.00 ]
+Key: VMOVDDUPrm:  [ 0.00  0.00 ]
+Key: VMOVDDUPrr:  [ 0.00  0.00 ]
+Key: VMOVDI:  [ 0.00  0.00 ]
+Key: VMOVDQA:  [ 0.00  0.00 ]
+Key: VMOVDQAYmr:  [ 0.00  0.00 ]
+Key: VMOVDQAYrm:  [ 0.00  0.00 ]
+Key: VMOVDQAYrr:  [ 0.00  0.00 ]
+Key: VMOVDQAYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVDQAmr:  [ 0.00  0.00 ]
+Key: VMOVDQArm:  [ 0.00  0.00 ]
+Key: VMOVDQArr:  [ 0.00  0.00 ]
+Key: VMOVDQArr_REV:  [ 0.00  0.00 ]
+Key: VMOVDQU:  [ 0.00  0.00 ]
+Key: VMOVDQUYmr:  [ 0.00  0.00 ]
+Key: VMOVDQUYrm:  [ 0.00  0.00 ]
+Key: VMOVDQUYrr:  [ 0.00  0.00 ]
+Key: VMOVDQUYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVDQUmr:  [ 0.00  0.00 ]
+Key: VMOVDQUrm:  [ 0.00  0.00 ]
+Key: VMOVDQUrr:  [ 0.00  0.00 ]
+Key: VMOVDQUrr_REV:  [ 0.00  0.00 ]
+Key: VMOVHLPSZrr:  [ 0.00  0.00 ]
+Key: VMOVHLPSrr:  [ 0.00  0.00 ]
+Key: VMOVHPDZ:  [ 0.00  0.00 ]
+Key: VMOVHPDmr:  [ 0.00  0.00 ]
+Key: VMOVHPDrm:  [ 0.00  0.00 ]
+Key: VMOVHPSZ:  [ 0.00  0.00 ]
+Key: VMOVHPSmr:  [ 0.00  0.00 ]
+Key: VMOVHPSrm:  [ 0.00  0.00 ]
+Key: VMOVLHPSZrr:  [ 0.00  0.00 ]
+Key: VMOVLHPSrr:  [ 0.00  0.00 ]
+Key: VMOVLPDZ:  [ 0.00  0.00 ]
+Key: VMOVLPDmr:  [ 0.00  0.00 ]
+Key: VMOVLPDrm:  [ 0.00  0.00 ]
+Key: VMOVLPSZ:  [ 0.00  0.00 ]
+Key: VMOVLPSmr:  [ 0.00  0.00 ]
+Key: VMOVLPSrm:  [ 0.00  0.00 ]
+Key: VMOVMSKPDYrr:  [ 0.00  0.00 ]
+Key: VMOVMSKPDrr:  [ 0.00  0.00 ]
+Key: VMOVMSKPSYrr:  [ 0.00  0.00 ]
+Key: VMOVMSKPSrr:  [ 0.00  0.00 ]
+Key: VMOVNTDQAYrm:  [ 0.00  0.00 ]
+Key: VMOVNTDQAZ:  [ 0.00  0.00 ]
+Key: VMOVNTDQAZrm:  [ 0.00  0.00 ]
+Key: VMOVNTDQArm:  [ 0.00  0.00 ]
+Key: VMOVNTDQYmr:  [ 0.00  0.00 ]
+Key: VMOVNTDQZ:  [ 0.00  0.00 ]
+Key: VMOVNTDQZmr:  [ 0.00  0.00 ]
+Key: VMOVNTDQmr:  [ 0.00  0.00 ]
+Key: VMOVNTPDYmr:  [ 0.00  0.00 ]
+Key: VMOVNTPDZ:  [ 0.00  0.00 ]
+Key: VMOVNTPDZmr:  [ 0.00  0.00 ]
+Key: VMOVNTPDmr:  [ 0.00  0.00 ]
+Key: VMOVNTPSYmr:  [ 0.00  0.00 ]
+Key: VMOVNTPSZ:  [ 0.00  0.00 ]
+Key: VMOVNTPSZmr:  [ 0.00  0.00 ]
+Key: VMOVNTPSmr:  [ 0.00  0.00 ]
+Key: VMOVPDI:  [ 0.00  0.00 ]
+Key: VMOVPQI:  [ 0.00  0.00 ]
+Key: VMOVPQIto:  [ 0.00  0.00 ]
+Key: VMOVQI:  [ 0.00  0.00 ]
+Key: VMOVRSBZ:  [ 0.00  0.00 ]
+Key: VMOVRSBZm:  [ 0.00  0.00 ]
+Key: VMOVRSBZmk:  [ 0.00  0.00 ]
+Key: VMOVRSBZmkz:  [ 0.00  0.00 ]
+Key: VMOVRSDZ:  [ 0.00  0.00 ]
+Key: VMOVRSDZm:  [ 0.00  0.00 ]
+Key: VMOVRSDZmk:  [ 0.00  0.00 ]
+Key: VMOVRSDZmkz:  [ 0.00  0.00 ]
+Key: VMOVRSQZ:  [ 0.00  0.00 ]
+Key: VMOVRSQZm:  [ 0.00  0.00 ]
+Key: VMOVRSQZmk:  [ 0.00  0.00 ]
+Key: VMOVRSQZmkz:  [ 0.00  0.00 ]
+Key: VMOVRSWZ:  [ 0.00  0.00 ]
+Key: VMOVRSWZm:  [ 0.00  0.00 ]
+Key: VMOVRSWZmk:  [ 0.00  0.00 ]
+Key: VMOVRSWZmkz:  [ 0.00  0.00 ]
+Key: VMOVSDZmr:  [ 0.00  0.00 ]
+Key: VMOVSDZmrk:  [ 0.00  0.00 ]
+Key: VMOVSDZrm:  [ 0.00  0.00 ]
+Key: VMOVSDZrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSDZrmk:  [ 0.00  0.00 ]
+Key: VMOVSDZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSDZrr:  [ 0.00  0.00 ]
+Key: VMOVSDZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVSDZrrk:  [ 0.00  0.00 ]
+Key: VMOVSDZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVSDZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSDZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVSDmr:  [ 0.00  0.00 ]
+Key: VMOVSDrm:  [ 0.00  0.00 ]
+Key: VMOVSDrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSDrr:  [ 0.00  0.00 ]
+Key: VMOVSDrr_REV:  [ 0.00  0.00 ]
+Key: VMOVSDto:  [ 0.00  0.00 ]
+Key: VMOVSH:  [ 0.00  0.00 ]
+Key: VMOVSHDUPYrm:  [ 0.00  0.00 ]
+Key: VMOVSHDUPYrr:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZ:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrm:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrmk:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrr:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrrk:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSHDUPrm:  [ 0.00  0.00 ]
+Key: VMOVSHDUPrr:  [ 0.00  0.00 ]
+Key: VMOVSHZmr:  [ 0.00  0.00 ]
+Key: VMOVSHZmrk:  [ 0.00  0.00 ]
+Key: VMOVSHZrm:  [ 0.00  0.00 ]
+Key: VMOVSHZrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSHZrmk:  [ 0.00  0.00 ]
+Key: VMOVSHZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSHZrr:  [ 0.00  0.00 ]
+Key: VMOVSHZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVSHZrrk:  [ 0.00  0.00 ]
+Key: VMOVSHZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVSHZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSHZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVSHtoW:  [ 0.00  0.00 ]
+Key: VMOVSLDUPYrm:  [ 0.00  0.00 ]
+Key: VMOVSLDUPYrr:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZ:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrm:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrmk:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrr:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrrk:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSLDUPrm:  [ 0.00  0.00 ]
+Key: VMOVSLDUPrr:  [ 0.00  0.00 ]
+Key: VMOVSS:  [ 0.00  0.00 ]
+Key: VMOVSSZmr:  [ 0.00  0.00 ]
+Key: VMOVSSZmrk:  [ 0.00  0.00 ]
+Key: VMOVSSZrm:  [ 0.00  0.00 ]
+Key: VMOVSSZrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSSZrmk:  [ 0.00  0.00 ]
+Key: VMOVSSZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSSZrr:  [ 0.00  0.00 ]
+Key: VMOVSSZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVSSZrrk:  [ 0.00  0.00 ]
+Key: VMOVSSZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVSSZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSSZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVSSmr:  [ 0.00  0.00 ]
+Key: VMOVSSrm:  [ 0.00  0.00 ]
+Key: VMOVSSrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSSrr:  [ 0.00  0.00 ]
+Key: VMOVSSrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDYmr:  [ 0.00  0.00 ]
+Key: VMOVUPDYrm:  [ 0.00  0.00 ]
+Key: VMOVUPDYrr:  [ 0.00  0.00 ]
+Key: VMOVUPDYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDZ:  [ 0.00  0.00 ]
+Key: VMOVUPDZmr:  [ 0.00  0.00 ]
+Key: VMOVUPDZmrk:  [ 0.00  0.00 ]
+Key: VMOVUPDZrm:  [ 0.00  0.00 ]
+Key: VMOVUPDZrmk:  [ 0.00  0.00 ]
+Key: VMOVUPDZrmkz:  [ 0.00  0.00 ]
+Key: VMOVUPDZrr:  [ 0.00  0.00 ]
+Key: VMOVUPDZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDZrrk:  [ 0.00  0.00 ]
+Key: VMOVUPDZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDZrrkz:  [ 0.00  0.00 ]
+Key: VMOVUPDZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDmr:  [ 0.00  0.00 ]
+Key: VMOVUPDrm:  [ 0.00  0.00 ]
+Key: VMOVUPDrr:  [ 0.00  0.00 ]
+Key: VMOVUPDrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSYmr:  [ 0.00  0.00 ]
+Key: VMOVUPSYrm:  [ 0.00  0.00 ]
+Key: VMOVUPSYrr:  [ 0.00  0.00 ]
+Key: VMOVUPSYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSZ:  [ 0.00  0.00 ]
+Key: VMOVUPSZmr:  [ 0.00  0.00 ]
+Key: VMOVUPSZmrk:  [ 0.00  0.00 ]
+Key: VMOVUPSZrm:  [ 0.00  0.00 ]
+Key: VMOVUPSZrmk:  [ 0.00  0.00 ]
+Key: VMOVUPSZrmkz:  [ 0.00  0.00 ]
+Key: VMOVUPSZrr:  [ 0.00  0.00 ]
+Key: VMOVUPSZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSZrrk:  [ 0.00  0.00 ]
+Key: VMOVUPSZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSZrrkz:  [ 0.00  0.00 ]
+Key: VMOVUPSZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSmr:  [ 0.00  0.00 ]
+Key: VMOVUPSrm:  [ 0.00  0.00 ]
+Key: VMOVUPSrr:  [ 0.00  0.00 ]
+Key: VMOVUPSrr_REV:  [ 0.00  0.00 ]
+Key: VMOVW:  [ 0.00  0.00 ]
+Key: VMOVWmr:  [ 0.00  0.00 ]
+Key: VMOVWrm:  [ 0.00  0.00 ]
+Key: VMOVZPDILo:  [ 0.00  0.00 ]
+Key: VMOVZPQILo:  [ 0.00  0.00 ]
+Key: VMOVZPWILo:  [ 0.00  0.00 ]
+Key: VMPSADBWYrmi:  [ 0.00  0.00 ]
+Key: VMPSADBWYrri:  [ 0.00  0.00 ]
+Key: VMPSADBWZ:  [ 0.00  0.00 ]
+Key: VMPSADBWZrmi:  [ 0.00  0.00 ]
+Key: VMPSADBWZrmik:  [ 0.00  0.00 ]
+Key: VMPSADBWZrmikz:  [ 0.00  0.00 ]
+Key: VMPSADBWZrri:  [ 0.00  0.00 ]
+Key: VMPSADBWZrrik:  [ 0.00  0.00 ]
+Key: VMPSADBWZrrikz:  [ 0.00  0.00 ]
+Key: VMPSADBWrmi:  [ 0.00  0.00 ]
+Key: VMPSADBWrri:  [ 0.00  0.00 ]
+Key: VMPTRLDm:  [ 0.00  0.00 ]
+Key: VMPTRSTm:  [ 0.00  0.00 ]
+Key: VMREAD:  [ 0.00  0.00 ]
+Key: VMRESUME:  [ 0.00  0.00 ]
+Key: VMRUN:  [ 0.00  0.00 ]
+Key: VMSAVE:  [ 0.00  0.00 ]
+Key: VMULBF:  [ 0.00  0.00 ]
+Key: VMULPDYrm:  [ 0.00  0.00 ]
+Key: VMULPDYrr:  [ 0.00  0.00 ]
+Key: VMULPDZ:  [ 0.00  0.00 ]
+Key: VMULPDZrm:  [ 0.00  0.00 ]
+Key: VMULPDZrmb:  [ 0.00  0.00 ]
+Key: VMULPDZrmbk:  [ 0.00  0.00 ]
+Key: VMULPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMULPDZrmk:  [ 0.00  0.00 ]
+Key: VMULPDZrmkz:  [ 0.00  0.00 ]
+Key: VMULPDZrr:  [ 0.00  0.00 ]
+Key: VMULPDZrrb:  [ 0.00  0.00 ]
+Key: VMULPDZrrbk:  [ 0.00  0.00 ]
+Key: VMULPDZrrbkz:  [ 0.00  0.00 ]
+Key: VMULPDZrrk:  [ 0.00  0.00 ]
+Key: VMULPDZrrkz:  [ 0.00  0.00 ]
+Key: VMULPDrm:  [ 0.00  0.00 ]
+Key: VMULPDrr:  [ 0.00  0.00 ]
+Key: VMULPHZ:  [ 0.00  0.00 ]
+Key: VMULPHZrm:  [ 0.00  0.00 ]
+Key: VMULPHZrmb:  [ 0.00  0.00 ]
+Key: VMULPHZrmbk:  [ 0.00  0.00 ]
+Key: VMULPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMULPHZrmk:  [ 0.00  0.00 ]
+Key: VMULPHZrmkz:  [ 0.00  0.00 ]
+Key: VMULPHZrr:  [ 0.00  0.00 ]
+Key: VMULPHZrrb:  [ 0.00  0.00 ]
+Key: VMULPHZrrbk:  [ 0.00  0.00 ]
+Key: VMULPHZrrbkz:  [ 0.00  0.00 ]
+Key: VMULPHZrrk:  [ 0.00  0.00 ]
+Key: VMULPHZrrkz:  [ 0.00  0.00 ]
+Key: VMULPSYrm:  [ 0.00  0.00 ]
+Key: VMULPSYrr:  [ 0.00  0.00 ]
+Key: VMULPSZ:  [ 0.00  0.00 ]
+Key: VMULPSZrm:  [ 0.00  0.00 ]
+Key: VMULPSZrmb:  [ 0.00  0.00 ]
+Key: VMULPSZrmbk:  [ 0.00  0.00 ]
+Key: VMULPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMULPSZrmk:  [ 0.00  0.00 ]
+Key: VMULPSZrmkz:  [ 0.00  0.00 ]
+Key: VMULPSZrr:  [ 0.00  0.00 ]
+Key: VMULPSZrrb:  [ 0.00  0.00 ]
+Key: VMULPSZrrbk:  [ 0.00  0.00 ]
+Key: VMULPSZrrbkz:  [ 0.00  0.00 ]
+Key: VMULPSZrrk:  [ 0.00  0.00 ]
+Key: VMULPSZrrkz:  [ 0.00  0.00 ]
+Key: VMULPSrm:  [ 0.00  0.00 ]
+Key: VMULPSrr:  [ 0.00  0.00 ]
+Key: VMULSDZrm:  [ 0.00  0.00 ]
+Key: VMULSDZrm_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrr:  [ 0.00  0.00 ]
+Key: VMULSDZrr_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMULSDrm:  [ 0.00  0.00 ]
+Key: VMULSDrm_Int:  [ 0.00  0.00 ]
+Key: VMULSDrr:  [ 0.00  0.00 ]
+Key: VMULSDrr_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrm:  [ 0.00  0.00 ]
+Key: VMULSHZrm_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrr:  [ 0.00  0.00 ]
+Key: VMULSHZrr_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrm:  [ 0.00  0.00 ]
+Key: VMULSSZrm_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrr:  [ 0.00  0.00 ]
+Key: VMULSSZrr_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMULSSrm:  [ 0.00  0.00 ]
+Key: VMULSSrm_Int:  [ 0.00  0.00 ]
+Key: VMULSSrr:  [ 0.00  0.00 ]
+Key: VMULSSrr_Int:  [ 0.00  0.00 ]
+Key: VMWRITE:  [ 0.00  0.00 ]
+Key: VMXOFF:  [ 0.00  0.00 ]
+Key: VMXON:  [ 0.00  0.00 ]
+Key: VORPDYrm:  [ 0.00  0.00 ]
+Key: VORPDYrr:  [ 0.00  0.00 ]
+Key: VORPDZ:  [ 0.00  0.00 ]
+Key: VORPDZrm:  [ 0.00  0.00 ]
+Key: VORPDZrmb:  [ 0.00  0.00 ]
+Key: VORPDZrmbk:  [ 0.00  0.00 ]
+Key: VORPDZrmbkz:  [ 0.00  0.00 ]
+Key: VORPDZrmk:  [ 0.00  0.00 ]
+Key: VORPDZrmkz:  [ 0.00  0.00 ]
+Key: VORPDZrr:  [ 0.00  0.00 ]
+Key: VORPDZrrk:  [ 0.00  0.00 ]
+Key: VORPDZrrkz:  [ 0.00  0.00 ]
+Key: VORPDrm:  [ 0.00  0.00 ]
+Key: VORPDrr:  [ 0.00  0.00 ]
+Key: VORPSYrm:  [ 0.00  0.00 ]
+Key: VORPSYrr:  [ 0.00  0.00 ]
+Key: VORPSZ:  [ 0.00  0.00 ]
+Key: VORPSZrm:  [ 0.00  0.00 ]
+Key: VORPSZrmb:  [ 0.00  0.00 ]
+Key: VORPSZrmbk:  [ 0.00  0.00 ]
+Key: VORPSZrmbkz:  [ 0.00  0.00 ]
+Key: VORPSZrmk:  [ 0.00  0.00 ]
+Key: VORPSZrmkz:  [ 0.00  0.00 ]
+Key: VORPSZrr:  [ 0.00  0.00 ]
+Key: VORPSZrrk:  [ 0.00  0.00 ]
+Key: VORPSZrrkz:  [ 0.00  0.00 ]
+Key: VORPSrm:  [ 0.00  0.00 ]
+Key: VORPSrr:  [ 0.00  0.00 ]
+Key: VP:  [ 0.00  0.00 ]
+Key: VPABSBYrm:  [ 0.00  0.00 ]
+Key: VPABSBYrr:  [ 0.00  0.00 ]
+Key: VPABSBZ:  [ 0.00  0.00 ]
+Key: VPABSBZrm:  [ 0.00  0.00 ]
+Key: VPABSBZrmk:  [ 0.00  0.00 ]
+Key: VPABSBZrmkz:  [ 0.00  0.00 ]
+Key: VPABSBZrr:  [ 0.00  0.00 ]
+Key: VPABSBZrrk:  [ 0.00  0.00 ]
+Key: VPABSBZrrkz:  [ 0.00  0.00 ]
+Key: VPABSBrm:  [ 0.00  0.00 ]
+Key: VPABSBrr:  [ 0.00  0.00 ]
+Key: VPABSDYrm:  [ 0.00  0.00 ]
+Key: VPABSDYrr:  [ 0.00  0.00 ]
+Key: VPABSDZ:  [ 0.00  0.00 ]
+Key: VPABSDZrm:  [ 0.00  0.00 ]
+Key: VPABSDZrmb:  [ 0.00  0.00 ]
+Key: VPABSDZrmbk:  [ 0.00  0.00 ]
+Key: VPABSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPABSDZrmk:  [ 0.00  0.00 ]
+Key: VPABSDZrmkz:  [ 0.00  0.00 ]
+Key: VPABSDZrr:  [ 0.00  0.00 ]
+Key: VPABSDZrrk:  [ 0.00  0.00 ]
+Key: VPABSDZrrkz:  [ 0.00  0.00 ]
+Key: VPABSDrm:  [ 0.00  0.00 ]
+Key: VPABSDrr:  [ 0.00  0.00 ]
+Key: VPABSQZ:  [ 0.00  0.00 ]
+Key: VPABSQZrm:  [ 0.00  0.00 ]
+Key: VPABSQZrmb:  [ 0.00  0.00 ]
+Key: VPABSQZrmbk:  [ 0.00  0.00 ]
+Key: VPABSQZrmbkz:  [ 0.00  0.00 ]
+Key: VPABSQZrmk:  [ 0.00  0.00 ]
+Key: VPABSQZrmkz:  [ 0.00  0.00 ]
+Key: VPABSQZrr:  [ 0.00  0.00 ]
+Key: VPABSQZrrk:  [ 0.00  0.00 ]
+Key: VPABSQZrrkz:  [ 0.00  0.00 ]
+Key: VPABSWYrm:  [ 0.00  0.00 ]
+Key: VPABSWYrr:  [ 0.00  0.00 ]
+Key: VPABSWZ:  [ 0.00  0.00 ]
+Key: VPABSWZrm:  [ 0.00  0.00 ]
+Key: VPABSWZrmk:  [ 0.00  0.00 ]
+Key: VPABSWZrmkz:  [ 0.00  0.00 ]
+Key: VPABSWZrr:  [ 0.00  0.00 ]
+Key: VPABSWZrrk:  [ 0.00  0.00 ]
+Key: VPABSWZrrkz:  [ 0.00  0.00 ]
+Key: VPABSWrm:  [ 0.00  0.00 ]
+Key: VPABSWrr:  [ 0.00  0.00 ]
+Key: VPACKSSDWYrm:  [ 0.00  0.00 ]
+Key: VPACKSSDWYrr:  [ 0.00  0.00 ]
+Key: VPACKSSDWZ:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrm:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmb:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmbk:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmbkz:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmk:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmkz:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrr:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrrk:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrrkz:  [ 0.00  0.00 ]
+Key: VPACKSSDWrm:  [ 0.00  0.00 ]
+Key: VPACKSSDWrr:  [ 0.00  0.00 ]
+Key: VPACKSSWBYrm:  [ 0.00  0.00 ]
+Key: VPACKSSWBYrr:  [ 0.00  0.00 ]
+Key: VPACKSSWBZ:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrm:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrmk:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrmkz:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrr:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrrk:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrrkz:  [ 0.00  0.00 ]
+Key: VPACKSSWBrm:  [ 0.00  0.00 ]
+Key: VPACKSSWBrr:  [ 0.00  0.00 ]
+Key: VPACKUSDWYrm:  [ 0.00  0.00 ]
+Key: VPACKUSDWYrr:  [ 0.00  0.00 ]
+Key: VPACKUSDWZ:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrm:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmb:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmbk:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmbkz:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmk:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmkz:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrr:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrrk:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrrkz:  [ 0.00  0.00 ]
+Key: VPACKUSDWrm:  [ 0.00  0.00 ]
+Key: VPACKUSDWrr:  [ 0.00  0.00 ]
+Key: VPACKUSWBYrm:  [ 0.00  0.00 ]
+Key: VPACKUSWBYrr:  [ 0.00  0.00 ]
+Key: VPACKUSWBZ:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrm:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrmk:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrmkz:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrr:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrrk:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrrkz:  [ 0.00  0.00 ]
+Key: VPACKUSWBrm:  [ 0.00  0.00 ]
+Key: VPACKUSWBrr:  [ 0.00  0.00 ]
+Key: VPADDBYrm:  [ 0.00  0.00 ]
+Key: VPADDBYrr:  [ 0.00  0.00 ]
+Key: VPADDBZ:  [ 0.00  0.00 ]
+Key: VPADDBZrm:  [ 0.00  0.00 ]
+Key: VPADDBZrmk:  [ 0.00  0.00 ]
+Key: VPADDBZrmkz:  [ 0.00  0.00 ]
+Key: VPADDBZrr:  [ 0.00  0.00 ]
+Key: VPADDBZrrk:  [ 0.00  0.00 ]
+Key: VPADDBZrrkz:  [ 0.00  0.00 ]
+Key: VPADDBrm:  [ 0.00  0.00 ]
+Key: VPADDBrr:  [ 0.00  0.00 ]
+Key: VPADDDYrm:  [ 0.00  0.00 ]
+Key: VPADDDYrr:  [ 0.00  0.00 ]
+Key: VPADDDZ:  [ 0.00  0.00 ]
+Key: VPADDDZrm:  [ 0.00  0.00 ]
+Key: VPADDDZrmb:  [ 0.00  0.00 ]
+Key: VPADDDZrmbk:  [ 0.00  0.00 ]
+Key: VPADDDZrmbkz:  [ 0.00  0.00 ]
+Key: VPADDDZrmk:  [ 0.00  0.00 ]
+Key: VPADDDZrmkz:  [ 0.00  0.00 ]
+Key: VPADDDZrr:  [ 0.00  0.00 ]
+Key: VPADDDZrrk:  [ 0.00  0.00 ]
+Key: VPADDDZrrkz:  [ 0.00  0.00 ]
+Key: VPADDDrm:  [ 0.00  0.00 ]
+Key: VPADDDrr:  [ 0.00  0.00 ]
+Key: VPADDQYrm:  [ 0.00  0.00 ]
+Key: VPADDQYrr:  [ 0.00  0.00 ]
+Key: VPADDQZ:  [ 0.00  0.00 ]
+Key: VPADDQZrm:  [ 0.00  0.00 ]
+Key: VPADDQZrmb:  [ 0.00  0.00 ]
+Key: VPADDQZrmbk:  [ 0.00  0.00 ]
+Key: VPADDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPADDQZrmk:  [ 0.00  0.00 ]
+Key: VPADDQZrmkz:  [ 0.00  0.00 ]
+Key: VPADDQZrr:  [ 0.00  0.00 ]
+Key: VPADDQZrrk:  [ 0.00  0.00 ]
+Key: VPADDQZrrkz:  [ 0.00  0.00 ]
+Key: VPADDQrm:  [ 0.00  0.00 ]
+Key: VPADDQrr:  [ 0.00  0.00 ]
+Key: VPADDSBYrm:  [ 0.00  0.00 ]
+Key: VPADDSBYrr:  [ 0.00  0.00 ]
+Key: VPADDSBZ:  [ 0.00  0.00 ]
+Key: VPADDSBZrm:  [ 0.00  0.00 ]
+Key: VPADDSBZrmk:  [ 0.00  0.00 ]
+Key: VPADDSBZrmkz:  [ 0.00  0.00 ]
+Key: VPADDSBZrr:  [ 0.00  0.00 ]
+Key: VPADDSBZrrk:  [ 0.00  0.00 ]
+Key: VPADDSBZrrkz:  [ 0.00  0.00 ]
+Key: VPADDSBrm:  [ 0.00  0.00 ]
+Key: VPADDSBrr:  [ 0.00  0.00 ]
+Key: VPADDSWYrm:  [ 0.00  0.00 ]
+Key: VPADDSWYrr:  [ 0.00  0.00 ]
+Key: VPADDSWZ:  [ 0.00  0.00 ]
+Key: VPADDSWZrm:  [ 0.00  0.00 ]
+Key: VPADDSWZrmk:  [ 0.00  0.00 ]
+Key: VPADDSWZrmkz:  [ 0.00  0.00 ]
+Key: VPADDSWZrr:  [ 0.00  0.00 ]
+Key: VPADDSWZrrk:  [ 0.00  0.00 ]
+Key: VPADDSWZrrkz:  [ 0.00  0.00 ]
+Key: VPADDSWrm:  [ 0.00  0.00 ]
+Key: VPADDSWrr:  [ 0.00  0.00 ]
+Key: VPADDUSBYrm:  [ 0.00  0.00 ]
+Key: VPADDUSBYrr:  [ 0.00  0.00 ]
+Key: VPADDUSBZ:  [ 0.00  0.00 ]
+Key: VPADDUSBZrm:  [ 0.00  0.00 ]
+Key: VPADDUSBZrmk:  [ 0.00  0.00 ]
+Key: VPADDUSBZrmkz:  [ 0.00  0.00 ]
+Key: VPADDUSBZrr:  [ 0.00  0.00 ]
+Key: VPADDUSBZrrk:  [ 0.00  0.00 ]
+Key: VPADDUSBZrrkz:  [ 0.00  0.00 ]
+Key: VPADDUSBrm:  [ 0.00  0.00 ]
+Key: VPADDUSBrr:  [ 0.00  0.00 ]
+Key: VPADDUSWYrm:  [ 0.00  0.00 ]
+Key: VPADDUSWYrr:  [ 0.00  0.00 ]
+Key: VPADDUSWZ:  [ 0.00  0.00 ]
+Key: VPADDUSWZrm:  [ 0.00  0.00 ]
+Key: VPADDUSWZrmk:  [ 0.00  0.00 ]
+Key: VPADDUSWZrmkz:  [ 0.00  0.00 ]
+Key: VPADDUSWZrr:  [ 0.00  0.00 ]
+Key: VPADDUSWZrrk:  [ 0.00  0.00 ]
+Key: VPADDUSWZrrkz:  [ 0.00  0.00 ]
+Key: VPADDUSWrm:  [ 0.00  0.00 ]
+Key: VPADDUSWrr:  [ 0.00  0.00 ]
+Key: VPADDWYrm:  [ 0.00  0.00 ]
+Key: VPADDWYrr:  [ 0.00  0.00 ]
+Key: VPADDWZ:  [ 0.00  0.00 ]
+Key: VPADDWZrm:  [ 0.00  0.00 ]
+Key: VPADDWZrmk:  [ 0.00  0.00 ]
+Key: VPADDWZrmkz:  [ 0.00  0.00 ]
+Key: VPADDWZrr:  [ 0.00  0.00 ]
+Key: VPADDWZrrk:  [ 0.00  0.00 ]
+Key: VPADDWZrrkz:  [ 0.00  0.00 ]
+Key: VPADDWrm:  [ 0.00  0.00 ]
+Key: VPADDWrr:  [ 0.00  0.00 ]
+Key: VPALIGNRYrmi:  [ 0.00  0.00 ]
+Key: VPALIGNRYrri:  [ 0.00  0.00 ]
+Key: VPALIGNRZ:  [ 0.00  0.00 ]
+Key: VPALIGNRZrmi:  [ 0.00  0.00 ]
+Key: VPALIGNRZrmik:  [ 0.00  0.00 ]
+Key: VPALIGNRZrmikz:  [ 0.00  0.00 ]
+Key: VPALIGNRZrri:  [ 0.00  0.00 ]
+Key: VPALIGNRZrrik:  [ 0.00  0.00 ]
+Key: VPALIGNRZrrikz:  [ 0.00  0.00 ]
+Key: VPALIGNRrmi:  [ 0.00  0.00 ]
+Key: VPALIGNRrri:  [ 0.00  0.00 ]
+Key: VPANDDZ:  [ 0.00  0.00 ]
+Key: VPANDDZrm:  [ 0.00  0.00 ]
+Key: VPANDDZrmb:  [ 0.00  0.00 ]
+Key: VPANDDZrmbk:  [ 0.00  0.00 ]
+Key: VPANDDZrmbkz:  [ 0.00  0.00 ]
+Key: VPANDDZrmk:  [ 0.00  0.00 ]
+Key: VPANDDZrmkz:  [ 0.00  0.00 ]
+Key: VPANDDZrr:  [ 0.00  0.00 ]
+Key: VPANDDZrrk:  [ 0.00  0.00 ]
+Key: VPANDDZrrkz:  [ 0.00  0.00 ]
+Key: VPANDNDZ:  [ 0.00  0.00 ]
+Key: VPANDNDZrm:  [ 0.00  0.00 ]
+Key: VPANDNDZrmb:  [ 0.00  0.00 ]
+Key: VPANDNDZrmbk:  [ 0.00  0.00 ]
+Key: VPANDNDZrmbkz:  [ 0.00  0.00 ]
+Key: VPANDNDZrmk:  [ 0.00  0.00 ]
+Key: VPANDNDZrmkz:  [ 0.00  0.00 ]
+Key: VPANDNDZrr:  [ 0.00  0.00 ]
+Key: VPANDNDZrrk:  [ 0.00  0.00 ]
+Key: VPANDNDZrrkz:  [ 0.00  0.00 ]
+Key: VPANDNQZ:  [ 0.00  0.00 ]
+Key: VPANDNQZrm:  [ 0.00  0.00 ]
+Key: VPANDNQZrmb:  [ 0.00  0.00 ]
+Key: VPANDNQZrmbk:  [ 0.00  0.00 ]
+Key: VPANDNQZrmbkz:  [ 0.00  0.00 ]
+Key: VPANDNQZrmk:  [ 0.00  0.00 ]
+Key: VPANDNQZrmkz:  [ 0.00  0.00 ]
+Key: VPANDNQZrr:  [ 0.00  0.00 ]
+Key: VPANDNQZrrk:  [ 0.00  0.00 ]
+Key: VPANDNQZrrkz:  [ 0.00  0.00 ]
+Key: VPANDNYrm:  [ 0.00  0.00 ]
+Key: VPANDNYrr:  [ 0.00  0.00 ]
+Key: VPANDNrm:  [ 0.00  0.00 ]
+Key: VPANDNrr:  [ 0.00  0.00 ]
+Key: VPANDQZ:  [ 0.00  0.00 ]
+Key: VPANDQZrm:  [ 0.00  0.00 ]
+Key: VPANDQZrmb:  [ 0.00  0.00 ]
+Key: VPANDQZrmbk:  [ 0.00  0.00 ]
+Key: VPANDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPANDQZrmk:  [ 0.00  0.00 ]
+Key: VPANDQZrmkz:  [ 0.00  0.00 ]
+Key: VPANDQZrr:  [ 0.00  0.00 ]
+Key: VPANDQZrrk:  [ 0.00  0.00 ]
+Key: VPANDQZrrkz:  [ 0.00  0.00 ]
+Key: VPANDYrm:  [ 0.00  0.00 ]
+Key: VPANDYrr:  [ 0.00  0.00 ]
+Key: VPANDrm:  [ 0.00  0.00 ]
+Key: VPANDrr:  [ 0.00  0.00 ]
+Key: VPAVGBYrm:  [ 0.00  0.00 ]
+Key: VPAVGBYrr:  [ 0.00  0.00 ]
+Key: VPAVGBZ:  [ 0.00  0.00 ]
+Key: VPAVGBZrm:  [ 0.00  0.00 ]
+Key: VPAVGBZrmk:  [ 0.00  0.00 ]
+Key: VPAVGBZrmkz:  [ 0.00  0.00 ]
+Key: VPAVGBZrr:  [ 0.00  0.00 ]
+Key: VPAVGBZrrk:  [ 0.00  0.00 ]
+Key: VPAVGBZrrkz:  [ 0.00  0.00 ]
+Key: VPAVGBrm:  [ 0.00  0.00 ]
+Key: VPAVGBrr:  [ 0.00  0.00 ]
+Key: VPAVGWYrm:  [ 0.00  0.00 ]
+Key: VPAVGWYrr:  [ 0.00  0.00 ]
+Key: VPAVGWZ:  [ 0.00  0.00 ]
+Key: VPAVGWZrm:  [ 0.00  0.00 ]
+Key: VPAVGWZrmk:  [ 0.00  0.00 ]
+Key: VPAVGWZrmkz:  [ 0.00  0.00 ]
+Key: VPAVGWZrr:  [ 0.00  0.00 ]
+Key: VPAVGWZrrk:  [ 0.00  0.00 ]
+Key: VPAVGWZrrkz:  [ 0.00  0.00 ]
+Key: VPAVGWrm:  [ 0.00  0.00 ]
+Key: VPAVGWrr:  [ 0.00  0.00 ]
+Key: VPBLENDDYrmi:  [ 0.00  0.00 ]
+Key: VPBLENDDYrri:  [ 0.00  0.00 ]
+Key: VPBLENDDrmi:  [ 0.00  0.00 ]
+Key: VPBLENDDrri:  [ 0.00  0.00 ]
+Key: VPBLENDMBZ:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrm:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrmk:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrmkz:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrr:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrrk:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrrkz:  [ 0.00  0.00 ]
+Key: VPBLENDMDZ:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrm:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmb:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmbk:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmbkz:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmk:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmkz:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrr:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrrk:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrrkz:  [ 0.00  0.00 ]
+Key: VPBLENDMQZ:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrm:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmb:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmbk:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmbkz:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmk:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmkz:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrr:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrrk:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrrkz:  [ 0.00  0.00 ]
+Key: VPBLENDMWZ:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrm:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrmk:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrmkz:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrr:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrrk:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrrkz:  [ 0.00  0.00 ]
+Key: VPBLENDVBYrmr:  [ 0.00  0.00 ]
+Key: VPBLENDVBYrrr:  [ 0.00  0.00 ]
+Key: VPBLENDVBrmr:  [ 0.00  0.00 ]
+Key: VPBLENDVBrrr:  [ 0.00  0.00 ]
+Key: VPBLENDWYrmi:  [ 0.00  0.00 ]
+Key: VPBLENDWYrri:  [ 0.00  0.00 ]
+Key: VPBLENDWrmi:  [ 0.00  0.00 ]
+Key: VPBLENDWrri:  [ 0.00  0.00 ]
+Key: VPBROADCASTBYrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTBYrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrmk:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrmkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTDYrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTDYrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrmk:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrmkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTMB:  [ 0.00  0.00 ]
+Key: VPBROADCASTMW:  [ 0.00  0.00 ]
+Key: VPBROADCASTQYrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTQYrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrmk:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrmkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTWYrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTWYrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrmk:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrmkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrr:  [ 0.00  0.00 ]
+Key: VPCLMULQDQYrmi:  [ 0.00  0.00 ]
+Key: VPCLMULQDQYrri:  [ 0.00  0.00 ]
+Key: VPCLMULQDQZ:  [ 0.00  0.00 ]
+Key: VPCLMULQDQZrmi:  [ 0.00  0.00 ]
+Key: VPCLMULQDQZrri:  [ 0.00  0.00 ]
+Key: VPCLMULQDQrmi:  [ 0.00  0.00 ]
+Key: VPCLMULQDQrri:  [ 0.00  0.00 ]
+Key: VPCMOVYrmr:  [ 0.00  0.00 ]
+Key: VPCMOVYrrm:  [ 0.00  0.00 ]
+Key: VPCMOVYrrr:  [ 0.00  0.00 ]
+Key: VPCMOVYrrr_REV:  [ 0.00  0.00 ]
+Key: VPCMOVrmr:  [ 0.00  0.00 ]
+Key: VPCMOVrrm:  [ 0.00  0.00 ]
+Key: VPCMOVrrr:  [ 0.00  0.00 ]
+Key: VPCMOVrrr_REV:  [ 0.00  0.00 ]
+Key: VPCMPBZ:  [ 0.00  0.00 ]
+Key: VPCMPBZrmi:  [ 0.00  0.00 ]
+Key: VPCMPBZrmik:  [ 0.00  0.00 ]
+Key: VPCMPBZrri:  [ 0.00  0.00 ]
+Key: VPCMPBZrrik:  [ 0.00  0.00 ]
+Key: VPCMPDZ:  [ 0.00  0.00 ]
+Key: VPCMPDZrmbi:  [ 0.00  0.00 ]
+Key: VPCMPDZrmbik:  [ 0.00  0.00 ]
+Key: VPCMPDZrmi:  [ 0.00  0.00 ]
+Key: VPCMPDZrmik:  [ 0.00  0.00 ]
+Key: VPCMPDZrri:  [ 0.00  0.00 ]
+Key: VPCMPDZrrik:  [ 0.00  0.00 ]
+Key: VPCMPEQBYrm:  [ 0.00  0.00 ]
+Key: VPCMPEQBYrr:  [ 0.00  0.00 ]
+Key: VPCMPEQBZ:  [ 0.00  0.00 ]
+Key: VPCMPEQBZrm:  [ 0.00  0.00 ]
+Key: VPCMPEQBZrmk:  [ 0.00  0.00 ]
+Key: VPCMPEQBZrr:  [ 0.00  0.00 ]
+Key: VPCMPEQBZrrk:  [ 0.00  0.00 ]
+Key: VPCMPEQBrm:  [ 0.00  0.00 ]
+Key: VPCMPEQBrr:  [ 0.00  0.00 ]
+Key: VPCMPEQDYrm:  [ 0.00  0.00 ]
+Key: VPCMPEQDYrr:  [ 0.00  0.00 ]
+Key: VPCMPEQDZ:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrm:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrmb:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrmbk:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrmk:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrr:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrrk:  [ 0.00  0.00 ]
+Key: VPCMPEQDrm:  [ 0.00  0.00 ]
+Key: VPCMPEQDrr:  [ 0.00  0.00 ]
+Key: VPCMPEQQYrm:  [ 0.00  0.00 ]
+Key: VPCMPEQQYrr:  [ 0.00  0.00 ]
+Key: VPCMPEQQZ:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrm:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrmb:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrmbk:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrmk:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrr:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrrk:  [ 0.00  0.00 ]
+Key: VPCMPEQQrm:  [ 0.00  0.00 ]
+Key: VPCMPEQQrr:  [ 0.00  0.00 ]
+Key: VPCMPEQWYrm:  [ 0.00  0.00 ]
+Key: VPCMPEQWYrr:  [ 0.00  0.00 ]
+Key: VPCMPEQWZ:  [ 0.00  0.00 ]
+Key: VPCMPEQWZrm:  [ 0.00  0.00 ]
+Key: VPCMPEQWZrmk:  [ 0.00  0.00 ]
+Key: VPCMPEQWZrr:  [ 0.00  0.00 ]
+Key: VPCMPEQWZrrk:  [ 0.00  0.00 ]
+Key: VPCMPEQWrm:  [ 0.00  0.00 ]
+Key: VPCMPEQWrr:  [ 0.00  0.00 ]
+Key: VPCMPESTRIrmi:  [ 0.00  0.00 ]
+Key: VPCMPESTRIrri:  [ 0.00  0.00 ]
+Key: VPCMPESTRMrmi:  [ 0.00  0.00 ]
+Key: VPCMPESTRMrri:  [ 0.00  0.00 ]
+Key: VPCMPGTBYrm:  [ 0.00  0.00 ]
+Key: VPCMPGTBYrr:  [ 0.00  0.00 ]
+Key: VPCMPGTBZ:  [ 0.00  0.00 ]
+Key: VPCMPGTBZrm:  [ 0.00  0.00 ]
+Key: VPCMPGTBZrmk:  [ 0.00  0.00 ]
+Key: VPCMPGTBZrr:  [ 0.00  0.00 ]
+Key: VPCMPGTBZrrk:  [ 0.00  0.00 ]
+Key: VPCMPGTBrm:  [ 0.00  0.00 ]
+Key: VPCMPGTBrr:  [ 0.00  0.00 ]
+Key: VPCMPGTDYrm:  [ 0.00  0.00 ]
+Key: VPCMPGTDYrr:  [ 0.00  0.00 ]
+Key: VPCMPGTDZ:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrm:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrmb:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrmbk:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrmk:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrr:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrrk:  [ 0.00  0.00 ]
+Key: VPCMPGTDrm:  [ 0.00  0.00 ]
+Key: VPCMPGTDrr:  [ 0.00  0.00 ]
+Key: VPCMPGTQYrm:  [ 0.00  0.00 ]
+Key: VPCMPGTQYrr:  [ 0.00  0.00 ]
+Key: VPCMPGTQZ:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrm:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrmb:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrmbk:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrmk:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrr:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrrk:  [ 0.00  0.00 ]
+Key: VPCMPGTQrm:  [ 0.00  0.00 ]
+Key: VPCMPGTQrr:  [ 0.00  0.00 ]
+Key: VPCMPGTWYrm:  [ 0.00  0.00 ]
+Key: VPCMPGTWYrr:  [ 0.00  0.00 ]
+Key: VPCMPGTWZ:  [ 0.00  0.00 ]
+Key: VPCMPGTWZrm:  [ 0.00  0.00 ]
+Key: VPCMPGTWZrmk:  [ 0.00  0.00 ]
+Key: VPCMPGTWZrr:  [ 0.00  0.00 ]
+Key: VPCMPGTWZrrk:  [ 0.00  0.00 ]
+Key: VPCMPGTWrm:  [ 0.00  0.00 ]
+Key: VPCMPGTWrr:  [ 0.00  0.00 ]
+Key: VPCMPISTRIrmi:  [ 0.00  0.00 ]
+Key: VPCMPISTRIrri:  [ 0.00  0.00 ]
+Key: VPCMPISTRMrmi:  [ 0.00  0.00 ]
+Key: VPCMPISTRMrri:  [ 0.00  0.00 ]
+Key: VPCMPQZ:  [ 0.00  0.00 ]
+Key: VPCMPQZrmbi:  [ 0.00  0.00 ]
+Key: VPCMPQZrmbik:  [ 0.00  0.00 ]
+Key: VPCMPQZrmi:  [ 0.00  0.00 ]
+Key: VPCMPQZrmik:  [ 0.00  0.00 ]
+Key: VPCMPQZrri:  [ 0.00  0.00 ]
+Key: VPCMPQZrrik:  [ 0.00  0.00 ]
+Key: VPCMPUBZ:  [ 0.00  0.00 ]
+Key: VPCMPUBZrmi:  [ 0.00  0.00 ]
+Key: VPCMPUBZrmik:  [ 0.00  0.00 ]
+Key: VPCMPUBZrri:  [ 0.00  0.00 ]
+Key: VPCMPUBZrrik:  [ 0.00  0.00 ]
+Key: VPCMPUDZ:  [ 0.00  0.00 ]
+Key: VPCMPUDZrmbi:  [ 0.00  0.00 ]
+Key: VPCMPUDZrmbik:  [ 0.00  0.00 ]
+Key: VPCMPUDZrmi:  [ 0.00  0.00 ]
+Key: VPCMPUDZrmik:  [ 0.00  0.00 ]
+Key: VPCMPUDZrri:  [ 0.00  0.00 ]
+Key: VPCMPUDZrrik:  [ 0.00  0.00 ]
+Key: VPCMPUQZ:  [ 0.00  0.00 ]
+Key: VPCMPUQZrmbi:  [ 0.00  0.00 ]
+Key: VPCMPUQZrmbik:  [ 0.00  0.00 ]
+Key: VPCMPUQZrmi:  [ 0.00  0.00 ]
+Key: VPCMPUQZrmik:  [ 0.00  0.00 ]
+Key: VPCMPUQZrri:  [ 0.00  0.00 ]
+Key: VPCMPUQZrrik:  [ 0.00  0.00 ]
+Key: VPCMPUWZ:  [ 0.00  0.00 ]
+Key: VPCMPUWZrmi:  [ 0.00  0.00 ]
+Key: VPCMPUWZrmik:  [ 0.00  0.00 ]
+Key: VPCMPUWZrri:  [ 0.00  0.00 ]
+Key: VPCMPUWZrrik:  [ 0.00  0.00 ]
+Key: VPCMPWZ:  [ 0.00  0.00 ]
+Key: VPCMPWZrmi:  [ 0.00  0.00 ]
+Key: VPCMPWZrmik:  [ 0.00  0.00 ]
+Key: VPCMPWZrri:  [ 0.00  0.00 ]
+Key: VPCMPWZrrik:  [ 0.00  0.00 ]
+Key: VPCOMBmi:  [ 0.00  0.00 ]
+Key: VPCOMBri:  [ 0.00  0.00 ]
+Key: VPCOMDmi:  [ 0.00  0.00 ]
+Key: VPCOMDri:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZ:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZmr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZmrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZrr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZrrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZrrkz:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZ:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZmr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZmrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZrr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZrrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZrrkz:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZ:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZmr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZmrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZrr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZrrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZrrkz:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZ:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZmr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZmrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZrr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZrrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZrrkz:  [ 0.00  0.00 ]
+Key: VPCOMQmi:  [ 0.00  0.00 ]
+Key: VPCOMQri:  [ 0.00  0.00 ]
+Key: VPCOMUBmi:  [ 0.00  0.00 ]
+Key: VPCOMUBri:  [ 0.00  0.00 ]
+Key: VPCOMUDmi:  [ 0.00  0.00 ]
+Key: VPCOMUDri:  [ 0.00  0.00 ]
+Key: VPCOMUQmi:  [ 0.00  0.00 ]
+Key: VPCOMUQri:  [ 0.00  0.00 ]
+Key: VPCOMUWmi:  [ 0.00  0.00 ]
+Key: VPCOMUWri:  [ 0.00  0.00 ]
+Key: VPCOMWmi:  [ 0.00  0.00 ]
+Key: VPCOMWri:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZ:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrm:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmb:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmbk:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmbkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmk:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrr:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrrk:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrrkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZ:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrm:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmb:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmbk:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmbkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmk:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrr:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrrk:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDSYrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDSYrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZ:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDSrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDSrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDYrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDYrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDZ:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmb:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmk:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrrk:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDSYrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDSYrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZ:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDSrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDSrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDYrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDYrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDZ:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmb:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmk:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrrk:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDSYrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDSYrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZ:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDSrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDSrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDYrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDYrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDZ:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmb:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmk:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrrk:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDSYrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDSYrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZ:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDSrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDSrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDYrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDYrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDZ:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmb:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmk:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrrk:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDSYrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDSYrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZ:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDSrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDSrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDYrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDYrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDZ:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmb:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmk:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrrk:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDSYrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDSYrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZ:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDSrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDSrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDYrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDYrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDZ:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmb:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmk:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrrk:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDSYrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDSYrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZ:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDSrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDSrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDYrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDYrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDZ:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmb:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmk:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrrk:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDSYrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDSYrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZ:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDSrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDSrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDYrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDYrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDZ:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmb:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmk:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrrk:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDrr:  [ 0.00  0.00 ]
+Key: VPERM:  [ 0.00  0.00 ]
+Key: VPERMBZ:  [ 0.00  0.00 ]
+Key: VPERMBZrm:  [ 0.00  0.00 ]
+Key: VPERMBZrmk:  [ 0.00  0.00 ]
+Key: VPERMBZrmkz:  [ 0.00  0.00 ]
+Key: VPERMBZrr:  [ 0.00  0.00 ]
+Key: VPERMBZrrk:  [ 0.00  0.00 ]
+Key: VPERMBZrrkz:  [ 0.00  0.00 ]
+Key: VPERMDYrm:  [ 0.00  0.00 ]
+Key: VPERMDYrr:  [ 0.00  0.00 ]
+Key: VPERMDZ:  [ 0.00  0.00 ]
+Key: VPERMDZrm:  [ 0.00  0.00 ]
+Key: VPERMDZrmb:  [ 0.00  0.00 ]
+Key: VPERMDZrmbk:  [ 0.00  0.00 ]
+Key: VPERMDZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMDZrmk:  [ 0.00  0.00 ]
+Key: VPERMDZrmkz:  [ 0.00  0.00 ]
+Key: VPERMDZrr:  [ 0.00  0.00 ]
+Key: VPERMDZrrk:  [ 0.00  0.00 ]
+Key: VPERMDZrrkz:  [ 0.00  0.00 ]
+Key: VPERMI:  [ 0.00  0.00 ]
+Key: VPERMIL:  [ 0.00  0.00 ]
+Key: VPERMILPDYmi:  [ 0.00  0.00 ]
+Key: VPERMILPDYri:  [ 0.00  0.00 ]
+Key: VPERMILPDYrm:  [ 0.00  0.00 ]
+Key: VPERMILPDYrr:  [ 0.00  0.00 ]
+Key: VPERMILPDZ:  [ 0.00  0.00 ]
+Key: VPERMILPDZmbi:  [ 0.00  0.00 ]
+Key: VPERMILPDZmbik:  [ 0.00  0.00 ]
+Key: VPERMILPDZmbikz:  [ 0.00  0.00 ]
+Key: VPERMILPDZmi:  [ 0.00  0.00 ]
+Key: VPERMILPDZmik:  [ 0.00  0.00 ]
+Key: VPERMILPDZmikz:  [ 0.00  0.00 ]
+Key: VPERMILPDZri:  [ 0.00  0.00 ]
+Key: VPERMILPDZrik:  [ 0.00  0.00 ]
+Key: VPERMILPDZrikz:  [ 0.00  0.00 ]
+Key: VPERMILPDZrm:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmb:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmbk:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmk:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmkz:  [ 0.00  0.00 ]
+Key: VPERMILPDZrr:  [ 0.00  0.00 ]
+Key: VPERMILPDZrrk:  [ 0.00  0.00 ]
+Key: VPERMILPDZrrkz:  [ 0.00  0.00 ]
+Key: VPERMILPDmi:  [ 0.00  0.00 ]
+Key: VPERMILPDri:  [ 0.00  0.00 ]
+Key: VPERMILPDrm:  [ 0.00  0.00 ]
+Key: VPERMILPDrr:  [ 0.00  0.00 ]
+Key: VPERMILPSYmi:  [ 0.00  0.00 ]
+Key: VPERMILPSYri:  [ 0.00  0.00 ]
+Key: VPERMILPSYrm:  [ 0.00  0.00 ]
+Key: VPERMILPSYrr:  [ 0.00  0.00 ]
+Key: VPERMILPSZ:  [ 0.00  0.00 ]
+Key: VPERMILPSZmbi:  [ 0.00  0.00 ]
+Key: VPERMILPSZmbik:  [ 0.00  0.00 ]
+Key: VPERMILPSZmbikz:  [ 0.00  0.00 ]
+Key: VPERMILPSZmi:  [ 0.00  0.00 ]
+Key: VPERMILPSZmik:  [ 0.00  0.00 ]
+Key: VPERMILPSZmikz:  [ 0.00  0.00 ]
+Key: VPERMILPSZri:  [ 0.00  0.00 ]
+Key: VPERMILPSZrik:  [ 0.00  0.00 ]
+Key: VPERMILPSZrikz:  [ 0.00  0.00 ]
+Key: VPERMILPSZrm:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmb:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmbk:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmk:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmkz:  [ 0.00  0.00 ]
+Key: VPERMILPSZrr:  [ 0.00  0.00 ]
+Key: VPERMILPSZrrk:  [ 0.00  0.00 ]
+Key: VPERMILPSZrrkz:  [ 0.00  0.00 ]
+Key: VPERMILPSmi:  [ 0.00  0.00 ]
+Key: VPERMILPSri:  [ 0.00  0.00 ]
+Key: VPERMILPSrm:  [ 0.00  0.00 ]
+Key: VPERMILPSrr:  [ 0.00  0.00 ]
+Key: VPERMPDYmi:  [ 0.00  0.00 ]
+Key: VPERMPDYri:  [ 0.00  0.00 ]
+Key: VPERMPDZ:  [ 0.00  0.00 ]
+Key: VPERMPDZmbi:  [ 0.00  0.00 ]
+Key: VPERMPDZmbik:  [ 0.00  0.00 ]
+Key: VPERMPDZmbikz:  [ 0.00  0.00 ]
+Key: VPERMPDZmi:  [ 0.00  0.00 ]
+Key: VPERMPDZmik:  [ 0.00  0.00 ]
+Key: VPERMPDZmikz:  [ 0.00  0.00 ]
+Key: VPERMPDZri:  [ 0.00  0.00 ]
+Key: VPERMPDZrik:  [ 0.00  0.00 ]
+Key: VPERMPDZrikz:  [ 0.00  0.00 ]
+Key: VPERMPDZrm:  [ 0.00  0.00 ]
+Key: VPERMPDZrmb:  [ 0.00  0.00 ]
+Key: VPERMPDZrmbk:  [ 0.00  0.00 ]
+Key: VPERMPDZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMPDZrmk:  [ 0.00  0.00 ]
+Key: VPERMPDZrmkz:  [ 0.00  0.00 ]
+Key: VPERMPDZrr:  [ 0.00  0.00 ]
+Key: VPERMPDZrrk:  [ 0.00  0.00 ]
+Key: VPERMPDZrrkz:  [ 0.00  0.00 ]
+Key: VPERMPSYrm:  [ 0.00  0.00 ]
+Key: VPERMPSYrr:  [ 0.00  0.00 ]
+Key: VPERMPSZ:  [ 0.00  0.00 ]
+Key: VPERMPSZrm:  [ 0.00  0.00 ]
+Key: VPERMPSZrmb:  [ 0.00  0.00 ]
+Key: VPERMPSZrmbk:  [ 0.00  0.00 ]
+Key: VPERMPSZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMPSZrmk:  [ 0.00  0.00 ]
+Key: VPERMPSZrmkz:  [ 0.00  0.00 ]
+Key: VPERMPSZrr:  [ 0.00  0.00 ]
+Key: VPERMPSZrrk:  [ 0.00  0.00 ]
+Key: VPERMPSZrrkz:  [ 0.00  0.00 ]
+Key: VPERMQYmi:  [ 0.00  0.00 ]
+Key: VPERMQYri:  [ 0.00  0.00 ]
+Key: VPERMQZ:  [ 0.00  0.00 ]
+Key: VPERMQZmbi:  [ 0.00  0.00 ]
+Key: VPERMQZmbik:  [ 0.00  0.00 ]
+Key: VPERMQZmbikz:  [ 0.00  0.00 ]
+Key: VPERMQZmi:  [ 0.00  0.00 ]
+Key: VPERMQZmik:  [ 0.00  0.00 ]
+Key: VPERMQZmikz:  [ 0.00  0.00 ]
+Key: VPERMQZri:  [ 0.00  0.00 ]
+Key: VPERMQZrik:  [ 0.00  0.00 ]
+Key: VPERMQZrikz:  [ 0.00  0.00 ]
+Key: VPERMQZrm:  [ 0.00  0.00 ]
+Key: VPERMQZrmb:  [ 0.00  0.00 ]
+Key: VPERMQZrmbk:  [ 0.00  0.00 ]
+Key: VPERMQZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMQZrmk:  [ 0.00  0.00 ]
+Key: VPERMQZrmkz:  [ 0.00  0.00 ]
+Key: VPERMQZrr:  [ 0.00  0.00 ]
+Key: VPERMQZrrk:  [ 0.00  0.00 ]
+Key: VPERMQZrrkz:  [ 0.00  0.00 ]
+Key: VPERMT:  [ 0.00  0.00 ]
+Key: VPERMWZ:  [ 0.00  0.00 ]
+Key: VPERMWZrm:  [ 0.00  0.00 ]
+Key: VPERMWZrmk:  [ 0.00  0.00 ]
+Key: VPERMWZrmkz:  [ 0.00  0.00 ]
+Key: VPERMWZrr:  [ 0.00  0.00 ]
+Key: VPERMWZrrk:  [ 0.00  0.00 ]
+Key: VPERMWZrrkz:  [ 0.00  0.00 ]
+Key: VPEXPANDBZ:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrm:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrmk:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrmkz:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrr:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrrk:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrrkz:  [ 0.00  0.00 ]
+Key: VPEXPANDDZ:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrm:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrmk:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrmkz:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrr:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrrk:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrrkz:  [ 0.00  0.00 ]
+Key: VPEXPANDQZ:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrm:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrmk:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrmkz:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrr:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrrk:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrrkz:  [ 0.00  0.00 ]
+Key: VPEXPANDWZ:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrm:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrmk:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrmkz:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrr:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrrk:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrrkz:  [ 0.00  0.00 ]
+Key: VPEXTRBZmri:  [ 0.00  0.00 ]
+Key: VPEXTRBZrri:  [ 0.00  0.00 ]
+Key: VPEXTRBmri:  [ 0.00  0.00 ]
+Key: VPEXTRBrri:  [ 0.00  0.00 ]
+Key: VPEXTRDZmri:  [ 0.00  0.00 ]
+Key: VPEXTRDZrri:  [ 0.00  0.00 ]
+Key: VPEXTRDmri:  [ 0.00  0.00 ]
+Key: VPEXTRDrri:  [ 0.00  0.00 ]
+Key: VPEXTRQZmri:  [ 0.00  0.00 ]
+Key: VPEXTRQZrri:  [ 0.00  0.00 ]
+Key: VPEXTRQmri:  [ 0.00  0.00 ]
+Key: VPEXTRQrri:  [ 0.00  0.00 ]
+Key: VPEXTRWZmri:  [ 0.00  0.00 ]
+Key: VPEXTRWZrri:  [ 0.00  0.00 ]
+Key: VPEXTRWZrri_REV:  [ 0.00  0.00 ]
+Key: VPEXTRWmri:  [ 0.00  0.00 ]
+Key: VPEXTRWrri:  [ 0.00  0.00 ]
+Key: VPEXTRWrri_REV:  [ 0.00  0.00 ]
+Key: VPGATHERDDYrm:  [ 0.00  0.00 ]
+Key: VPGATHERDDZ:  [ 0.00  0.00 ]
+Key: VPGATHERDDZrm:  [ 0.00  0.00 ]
+Key: VPGATHERDDrm:  [ 0.00  0.00 ]
+Key: VPGATHERDQYrm:  [ 0.00  0.00 ]
+Key: VPGATHERDQZ:  [ 0.00  0.00 ]
+Key: VPGATHERDQZrm:  [ 0.00  0.00 ]
+Key: VPGATHERDQrm:  [ 0.00  0.00 ]
+Key: VPGATHERQDYrm:  [ 0.00  0.00 ]
+Key: VPGATHERQDZ:  [ 0.00  0.00 ]
+Key: VPGATHERQDZrm:  [ 0.00  0.00 ]
+Key: VPGATHERQDrm:  [ 0.00  0.00 ]
+Key: VPGATHERQQYrm:  [ 0.00  0.00 ]
+Key: VPGATHERQQZ:  [ 0.00  0.00 ]
+Key: VPGATHERQQZrm:  [ 0.00  0.00 ]
+Key: VPGATHERQQrm:  [ 0.00  0.00 ]
+Key: VPHADDBDrm:  [ 0.00  0.00 ]
+Key: VPHADDBDrr:  [ 0.00  0.00 ]
+Key: VPHADDBQrm:  [ 0.00  0.00 ]
+Key: VPHADDBQrr:  [ 0.00  0.00 ]
+Key: VPHADDBWrm:  [ 0.00  0.00 ]
+Key: VPHADDBWrr:  [ 0.00  0.00 ]
+Key: VPHADDDQrm:  [ 0.00  0.00 ]
+Key: VPHADDDQrr:  [ 0.00  0.00 ]
+Key: VPHADDDYrm:  [ 0.00  0.00 ]
+Key: VPHADDDYrr:  [ 0.00  0.00 ]
+Key: VPHADDDrm:  [ 0.00  0.00 ]
+Key: VPHADDDrr:  [ 0.00  0.00 ]
+Key: VPHADDSWYrm:  [ 0.00  0.00 ]
+Key: VPHADDSWYrr:  [ 0.00  0.00 ]
+Key: VPHADDSWrm:  [ 0.00  0.00 ]
+Key: VPHADDSWrr:  [ 0.00  0.00 ]
+Key: VPHADDUBDrm:  [ 0.00  0.00 ]
+Key: VPHADDUBDrr:  [ 0.00  0.00 ]
+Key: VPHADDUBQrm:  [ 0.00  0.00 ]
+Key: VPHADDUBQrr:  [ 0.00  0.00 ]
+Key: VPHADDUBWrm:  [ 0.00  0.00 ]
+Key: VPHADDUBWrr:  [ 0.00  0.00 ]
+Key: VPHADDUDQrm:  [ 0.00  0.00 ]
+Key: VPHADDUDQrr:  [ 0.00  0.00 ]
+Key: VPHADDUWDrm:  [ 0.00  0.00 ]
+Key: VPHADDUWDrr:  [ 0.00  0.00 ]
+Key: VPHADDUWQrm:  [ 0.00  0.00 ]
+Key: VPHADDUWQrr:  [ 0.00  0.00 ]
+Key: VPHADDWDrm:  [ 0.00  0.00 ]
+Key: VPHADDWDrr:  [ 0.00  0.00 ]
+Key: VPHADDWQrm:  [ 0.00  0.00 ]
+Key: VPHADDWQrr:  [ 0.00  0.00 ]
+Key: VPHADDWYrm:  [ 0.00  0.00 ]
+Key: VPHADDWYrr:  [ 0.00  0.00 ]
+Key: VPHADDWrm:  [ 0.00  0.00 ]
+Key: VPHADDWrr:  [ 0.00  0.00 ]
+Key: VPHMINPOSUWrm:  [ 0.00  0.00 ]
+Key: VPHMINPOSUWrr:  [ 0.00  0.00 ]
+Key: VPHSUBBWrm:  [ 0.00  0.00 ]
+Key: VPHSUBBWrr:  [ 0.00  0.00 ]
+Key: VPHSUBDQrm:  [ 0.00  0.00 ]
+Key: VPHSUBDQrr:  [ 0.00  0.00 ]
+Key: VPHSUBDYrm:  [ 0.00  0.00 ]
+Key: VPHSUBDYrr:  [ 0.00  0.00 ]
+Key: VPHSUBDrm:  [ 0.00  0.00 ]
+Key: VPHSUBDrr:  [ 0.00  0.00 ]
+Key: VPHSUBSWYrm:  [ 0.00  0.00 ]
+Key: VPHSUBSWYrr:  [ 0.00  0.00 ]
+Key: VPHSUBSWrm:  [ 0.00  0.00 ]
+Key: VPHSUBSWrr:  [ 0.00  0.00 ]
+Key: VPHSUBWDrm:  [ 0.00  0.00 ]
+Key: VPHSUBWDrr:  [ 0.00  0.00 ]
+Key: VPHSUBWYrm:  [ 0.00  0.00 ]
+Key: VPHSUBWYrr:  [ 0.00  0.00 ]
+Key: VPHSUBWrm:  [ 0.00  0.00 ]
+Key: VPHSUBWrr:  [ 0.00  0.00 ]
+Key: VPINSRBZrmi:  [ 0.00  0.00 ]
+Key: VPINSRBZrri:  [ 0.00  0.00 ]
+Key: VPINSRBrmi:  [ 0.00  0.00 ]
+Key: VPINSRBrri:  [ 0.00  0.00 ]
+Key: VPINSRDZrmi:  [ 0.00  0.00 ]
+Key: VPINSRDZrri:  [ 0.00  0.00 ]
+Key: VPINSRDrmi:  [ 0.00  0.00 ]
+Key: VPINSRDrri:  [ 0.00  0.00 ]
+Key: VPINSRQZrmi:  [ 0.00  0.00 ]
+Key: VPINSRQZrri:  [ 0.00  0.00 ]
+Key: VPINSRQrmi:  [ 0.00  0.00 ]
+Key: VPINSRQrri:  [ 0.00  0.00 ]
+Key: VPINSRWZrmi:  [ 0.00  0.00 ]
+Key: VPINSRWZrri:  [ 0.00  0.00 ]
+Key: VPINSRWrmi:  [ 0.00  0.00 ]
+Key: VPINSRWrri:  [ 0.00  0.00 ]
+Key: VPLZCNTDZ:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrm:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmb:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmbk:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmbkz:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmk:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmkz:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrr:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrrk:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrrkz:  [ 0.00  0.00 ]
+Key: VPLZCNTQZ:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrm:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmb:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmbk:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmbkz:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmk:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmkz:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrr:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrrk:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrrkz:  [ 0.00  0.00 ]
+Key: VPMACSDDrm:  [ 0.00  0.00 ]
+Key: VPMACSDDrr:  [ 0.00  0.00 ]
+Key: VPMACSDQHrm:  [ 0.00  0.00 ]
+Key: VPMACSDQHrr:  [ 0.00  0.00 ]
+Key: VPMACSDQLrm:  [ 0.00  0.00 ]
+Key: VPMACSDQLrr:  [ 0.00  0.00 ]
+Key: VPMACSSDDrm:  [ 0.00  0.00 ]
+Key: VPMACSSDDrr:  [ 0.00  0.00 ]
+Key: VPMACSSDQHrm:  [ 0.00  0.00 ]
+Key: VPMACSSDQHrr:  [ 0.00  0.00 ]
+Key: VPMACSSDQLrm:  [ 0.00  0.00 ]
+Key: VPMACSSDQLrr:  [ 0.00  0.00 ]
+Key: VPMACSSWDrm:  [ 0.00  0.00 ]
+Key: VPMACSSWDrr:  [ 0.00  0.00 ]
+Key: VPMACSSWWrm:  [ 0.00  0.00 ]
+Key: VPMACSSWWrr:  [ 0.00  0.00 ]
+Key: VPMACSWDrm:  [ 0.00  0.00 ]
+Key: VPMACSWDrr:  [ 0.00  0.00 ]
+Key: VPMACSWWrm:  [ 0.00  0.00 ]
+Key: VPMACSWWrr:  [ 0.00  0.00 ]
+Key: VPMADCSSWDrm:  [ 0.00  0.00 ]
+Key: VPMADCSSWDrr:  [ 0.00  0.00 ]
+Key: VPMADCSWDrm:  [ 0.00  0.00 ]
+Key: VPMADCSWDrr:  [ 0.00  0.00 ]
+Key: VPMADD:  [ 0.00  0.00 ]
+Key: VPMADDUBSWYrm:  [ 0.00  0.00 ]
+Key: VPMADDUBSWYrr:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZ:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrm:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrmk:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrmkz:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrr:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrrk:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrrkz:  [ 0.00  0.00 ]
+Key: VPMADDUBSWrm:  [ 0.00  0.00 ]
+Key: VPMADDUBSWrr:  [ 0.00  0.00 ]
+Key: VPMADDWDYrm:  [ 0.00  0.00 ]
+Key: VPMADDWDYrr:  [ 0.00  0.00 ]
+Key: VPMADDWDZ:  [ 0.00  0.00 ]
+Key: VPMADDWDZrm:  [ 0.00  0.00 ]
+Key: VPMADDWDZrmk:  [ 0.00  0.00 ]
+Key: VPMADDWDZrmkz:  [ 0.00  0.00 ]
+Key: VPMADDWDZrr:  [ 0.00  0.00 ]
+Key: VPMADDWDZrrk:  [ 0.00  0.00 ]
+Key: VPMADDWDZrrkz:  [ 0.00  0.00 ]
+Key: VPMADDWDrm:  [ 0.00  0.00 ]
+Key: VPMADDWDrr:  [ 0.00  0.00 ]
+Key: VPMASKMOVDYmr:  [ 0.00  0.00 ]
+Key: VPMASKMOVDYrm:  [ 0.00  0.00 ]
+Key: VPMASKMOVDmr:  [ 0.00  0.00 ]
+Key: VPMASKMOVDrm:  [ 0.00  0.00 ]
+Key: VPMASKMOVQYmr:  [ 0.00  0.00 ]
+Key: VPMASKMOVQYrm:  [ 0.00  0.00 ]
+Key: VPMASKMOVQmr:  [ 0.00  0.00 ]
+Key: VPMASKMOVQrm:  [ 0.00  0.00 ]
+Key: VPMAXSBYrm:  [ 0.00  0.00 ]
+Key: VPMAXSBYrr:  [ 0.00  0.00 ]
+Key: VPMAXSBZ:  [ 0.00  0.00 ]
+Key: VPMAXSBZrm:  [ 0.00  0.00 ]
+Key: VPMAXSBZrmk:  [ 0.00  0.00 ]
+Key: VPMAXSBZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXSBZrr:  [ 0.00  0.00 ]
+Key: VPMAXSBZrrk:  [ 0.00  0.00 ]
+Key: VPMAXSBZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXSBrm:  [ 0.00  0.00 ]
+Key: VPMAXSBrr:  [ 0.00  0.00 ]
+Key: VPMAXSDYrm:  [ 0.00  0.00 ]
+Key: VPMAXSDYrr:  [ 0.00  0.00 ]
+Key: VPMAXSDZ:  [ 0.00  0.00 ]
+Key: VPMAXSDZrm:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmb:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmbk:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmk:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXSDZrr:  [ 0.00  0.00 ]
+Key: VPMAXSDZrrk:  [ 0.00  0.00 ]
+Key: VPMAXSDZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXSDrm:  [ 0.00  0.00 ]
+Key: VPMAXSDrr:  [ 0.00  0.00 ]
+Key: VPMAXSQZ:  [ 0.00  0.00 ]
+Key: VPMAXSQZrm:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmb:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmbk:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmk:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXSQZrr:  [ 0.00  0.00 ]
+Key: VPMAXSQZrrk:  [ 0.00  0.00 ]
+Key: VPMAXSQZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXSWYrm:  [ 0.00  0.00 ]
+Key: VPMAXSWYrr:  [ 0.00  0.00 ]
+Key: VPMAXSWZ:  [ 0.00  0.00 ]
+Key: VPMAXSWZrm:  [ 0.00  0.00 ]
+Key: VPMAXSWZrmk:  [ 0.00  0.00 ]
+Key: VPMAXSWZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXSWZrr:  [ 0.00  0.00 ]
+Key: VPMAXSWZrrk:  [ 0.00  0.00 ]
+Key: VPMAXSWZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXSWrm:  [ 0.00  0.00 ]
+Key: VPMAXSWrr:  [ 0.00  0.00 ]
+Key: VPMAXUBYrm:  [ 0.00  0.00 ]
+Key: VPMAXUBYrr:  [ 0.00  0.00 ]
+Key: VPMAXUBZ:  [ 0.00  0.00 ]
+Key: VPMAXUBZrm:  [ 0.00  0.00 ]
+Key: VPMAXUBZrmk:  [ 0.00  0.00 ]
+Key: VPMAXUBZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXUBZrr:  [ 0.00  0.00 ]
+Key: VPMAXUBZrrk:  [ 0.00  0.00 ]
+Key: VPMAXUBZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXUBrm:  [ 0.00  0.00 ]
+Key: VPMAXUBrr:  [ 0.00  0.00 ]
+Key: VPMAXUDYrm:  [ 0.00  0.00 ]
+Key: VPMAXUDYrr:  [ 0.00  0.00 ]
+Key: VPMAXUDZ:  [ 0.00  0.00 ]
+Key: VPMAXUDZrm:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmb:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmbk:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmk:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXUDZrr:  [ 0.00  0.00 ]
+Key: VPMAXUDZrrk:  [ 0.00  0.00 ]
+Key: VPMAXUDZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXUDrm:  [ 0.00  0.00 ]
+Key: VPMAXUDrr:  [ 0.00  0.00 ]
+Key: VPMAXUQZ:  [ 0.00  0.00 ]
+Key: VPMAXUQZrm:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmb:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmbk:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmk:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXUQZrr:  [ 0.00  0.00 ]
+Key: VPMAXUQZrrk:  [ 0.00  0.00 ]
+Key: VPMAXUQZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXUWYrm:  [ 0.00  0.00 ]
+Key: VPMAXUWYrr:  [ 0.00  0.00 ]
+Key: VPMAXUWZ:  [ 0.00  0.00 ]
+Key: VPMAXUWZrm:  [ 0.00  0.00 ]
+Key: VPMAXUWZrmk:  [ 0.00  0.00 ]
+Key: VPMAXUWZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXUWZrr:  [ 0.00  0.00 ]
+Key: VPMAXUWZrrk:  [ 0.00  0.00 ]
+Key: VPMAXUWZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXUWrm:  [ 0.00  0.00 ]
+Key: VPMAXUWrr:  [ 0.00  0.00 ]
+Key: VPMINSBYrm:  [ 0.00  0.00 ]
+Key: VPMINSBYrr:  [ 0.00  0.00 ]
+Key: VPMINSBZ:  [ 0.00  0.00 ]
+Key: VPMINSBZrm:  [ 0.00  0.00 ]
+Key: VPMINSBZrmk:  [ 0.00  0.00 ]
+Key: VPMINSBZrmkz:  [ 0.00  0.00 ]
+Key: VPMINSBZrr:  [ 0.00  0.00 ]
+Key: VPMINSBZrrk:  [ 0.00  0.00 ]
+Key: VPMINSBZrrkz:  [ 0.00  0.00 ]
+Key: VPMINSBrm:  [ 0.00  0.00 ]
+Key: VPMINSBrr:  [ 0.00  0.00 ]
+Key: VPMINSDYrm:  [ 0.00  0.00 ]
+Key: VPMINSDYrr:  [ 0.00  0.00 ]
+Key: VPMINSDZ:  [ 0.00  0.00 ]
+Key: VPMINSDZrm:  [ 0.00  0.00 ]
+Key: VPMINSDZrmb:  [ 0.00  0.00 ]
+Key: VPMINSDZrmbk:  [ 0.00  0.00 ]
+Key: VPMINSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMINSDZrmk:  [ 0.00  0.00 ]
+Key: VPMINSDZrmkz:  [ 0.00  0.00 ]
+Key: VPMINSDZrr:  [ 0.00  0.00 ]
+Key: VPMINSDZrrk:  [ 0.00  0.00 ]
+Key: VPMINSDZrrkz:  [ 0.00  0.00 ]
+Key: VPMINSDrm:  [ 0.00  0.00 ]
+Key: VPMINSDrr:  [ 0.00  0.00 ]
+Key: VPMINSQZ:  [ 0.00  0.00 ]
+Key: VPMINSQZrm:  [ 0.00  0.00 ]
+Key: VPMINSQZrmb:  [ 0.00  0.00 ]
+Key: VPMINSQZrmbk:  [ 0.00  0.00 ]
+Key: VPMINSQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMINSQZrmk:  [ 0.00  0.00 ]
+Key: VPMINSQZrmkz:  [ 0.00  0.00 ]
+Key: VPMINSQZrr:  [ 0.00  0.00 ]
+Key: VPMINSQZrrk:  [ 0.00  0.00 ]
+Key: VPMINSQZrrkz:  [ 0.00  0.00 ]
+Key: VPMINSWYrm:  [ 0.00  0.00 ]
+Key: VPMINSWYrr:  [ 0.00  0.00 ]
+Key: VPMINSWZ:  [ 0.00  0.00 ]
+Key: VPMINSWZrm:  [ 0.00  0.00 ]
+Key: VPMINSWZrmk:  [ 0.00  0.00 ]
+Key: VPMINSWZrmkz:  [ 0.00  0.00 ]
+Key: VPMINSWZrr:  [ 0.00  0.00 ]
+Key: VPMINSWZrrk:  [ 0.00  0.00 ]
+Key: VPMINSWZrrkz:  [ 0.00  0.00 ]
+Key: VPMINSWrm:  [ 0.00  0.00 ]
+Key: VPMINSWrr:  [ 0.00  0.00 ]
+Key: VPMINUBYrm:  [ 0.00  0.00 ]
+Key: VPMINUBYrr:  [ 0.00  0.00 ]
+Key: VPMINUBZ:  [ 0.00  0.00 ]
+Key: VPMINUBZrm:  [ 0.00  0.00 ]
+Key: VPMINUBZrmk:  [ 0.00  0.00 ]
+Key: VPMINUBZrmkz:  [ 0.00  0.00 ]
+Key: VPMINUBZrr:  [ 0.00  0.00 ]
+Key: VPMINUBZrrk:  [ 0.00  0.00 ]
+Key: VPMINUBZrrkz:  [ 0.00  0.00 ]
+Key: VPMINUBrm:  [ 0.00  0.00 ]
+Key: VPMINUBrr:  [ 0.00  0.00 ]
+Key: VPMINUDYrm:  [ 0.00  0.00 ]
+Key: VPMINUDYrr:  [ 0.00  0.00 ]
+Key: VPMINUDZ:  [ 0.00  0.00 ]
+Key: VPMINUDZrm:  [ 0.00  0.00 ]
+Key: VPMINUDZrmb:  [ 0.00  0.00 ]
+Key: VPMINUDZrmbk:  [ 0.00  0.00 ]
+Key: VPMINUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMINUDZrmk:  [ 0.00  0.00 ]
+Key: VPMINUDZrmkz:  [ 0.00  0.00 ]
+Key: VPMINUDZrr:  [ 0.00  0.00 ]
+Key: VPMINUDZrrk:  [ 0.00  0.00 ]
+Key: VPMINUDZrrkz:  [ 0.00  0.00 ]
+Key: VPMINUDrm:  [ 0.00  0.00 ]
+Key: VPMINUDrr:  [ 0.00  0.00 ]
+Key: VPMINUQZ:  [ 0.00  0.00 ]
+Key: VPMINUQZrm:  [ 0.00  0.00 ]
+Key: VPMINUQZrmb:  [ 0.00  0.00 ]
+Key: VPMINUQZrmbk:  [ 0.00  0.00 ]
+Key: VPMINUQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMINUQZrmk:  [ 0.00  0.00 ]
+Key: VPMINUQZrmkz:  [ 0.00  0.00 ]
+Key: VPMINUQZrr:  [ 0.00  0.00 ]
+Key: VPMINUQZrrk:  [ 0.00  0.00 ]
+Key: VPMINUQZrrkz:  [ 0.00  0.00 ]
+Key: VPMINUWYrm:  [ 0.00  0.00 ]
+Key: VPMINUWYrr:  [ 0.00  0.00 ]
+Key: VPMINUWZ:  [ 0.00  0.00 ]
+Key: VPMINUWZrm:  [ 0.00  0.00 ]
+Key: VPMINUWZrmk:  [ 0.00  0.00 ]
+Key: VPMINUWZrmkz:  [ 0.00  0.00 ]
+Key: VPMINUWZrr:  [ 0.00  0.00 ]
+Key: VPMINUWZrrk:  [ 0.00  0.00 ]
+Key: VPMINUWZrrkz:  [ 0.00  0.00 ]
+Key: VPMINUWrm:  [ 0.00  0.00 ]
+Key: VPMINUWrr:  [ 0.00  0.00 ]
+Key: VPMOVB:  [ 0.00  0.00 ]
+Key: VPMOVD:  [ 0.00  0.00 ]
+Key: VPMOVDBZ:  [ 0.00  0.00 ]
+Key: VPMOVDBZmr:  [ 0.00  0.00 ]
+Key: VPMOVDBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVDBZrr:  [ 0.00  0.00 ]
+Key: VPMOVDBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVDBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVDWZ:  [ 0.00  0.00 ]
+Key: VPMOVDWZmr:  [ 0.00  0.00 ]
+Key: VPMOVDWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVDWZrr:  [ 0.00  0.00 ]
+Key: VPMOVDWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVDWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVM:  [ 0.00  0.00 ]
+Key: VPMOVMSKBYrr:  [ 0.00  0.00 ]
+Key: VPMOVMSKBrr:  [ 0.00  0.00 ]
+Key: VPMOVQ:  [ 0.00  0.00 ]
+Key: VPMOVQBZ:  [ 0.00  0.00 ]
+Key: VPMOVQBZmr:  [ 0.00  0.00 ]
+Key: VPMOVQBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVQBZrr:  [ 0.00  0.00 ]
+Key: VPMOVQBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVQBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVQDZ:  [ 0.00  0.00 ]
+Key: VPMOVQDZmr:  [ 0.00  0.00 ]
+Key: VPMOVQDZmrk:  [ 0.00  0.00 ]
+Key: VPMOVQDZrr:  [ 0.00  0.00 ]
+Key: VPMOVQDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVQDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVQWZ:  [ 0.00  0.00 ]
+Key: VPMOVQWZmr:  [ 0.00  0.00 ]
+Key: VPMOVQWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVQWZrr:  [ 0.00  0.00 ]
+Key: VPMOVQWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVQWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSDBZ:  [ 0.00  0.00 ]
+Key: VPMOVSDBZmr:  [ 0.00  0.00 ]
+Key: VPMOVSDBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSDBZrr:  [ 0.00  0.00 ]
+Key: VPMOVSDBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSDBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSDWZ:  [ 0.00  0.00 ]
+Key: VPMOVSDWZmr:  [ 0.00  0.00 ]
+Key: VPMOVSDWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSDWZrr:  [ 0.00  0.00 ]
+Key: VPMOVSDWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSDWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSQBZ:  [ 0.00  0.00 ]
+Key: VPMOVSQBZmr:  [ 0.00  0.00 ]
+Key: VPMOVSQBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSQBZrr:  [ 0.00  0.00 ]
+Key: VPMOVSQBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSQBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSQDZ:  [ 0.00  0.00 ]
+Key: VPMOVSQDZmr:  [ 0.00  0.00 ]
+Key: VPMOVSQDZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSQDZrr:  [ 0.00  0.00 ]
+Key: VPMOVSQDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSQDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSQWZ:  [ 0.00  0.00 ]
+Key: VPMOVSQWZmr:  [ 0.00  0.00 ]
+Key: VPMOVSQWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSQWZrr:  [ 0.00  0.00 ]
+Key: VPMOVSQWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSQWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSWBZ:  [ 0.00  0.00 ]
+Key: VPMOVSWBZmr:  [ 0.00  0.00 ]
+Key: VPMOVSWBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSWBZrr:  [ 0.00  0.00 ]
+Key: VPMOVSWBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSWBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBDYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBDYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZ:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBDrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBDrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBQYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBQYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZ:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBQrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBQrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBWYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBWYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZ:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBWrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBWrr:  [ 0.00  0.00 ]
+Key: VPMOVSXDQYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXDQYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZ:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXDQrm:  [ 0.00  0.00 ]
+Key: VPMOVSXDQrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWDYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWDYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZ:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXWDrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWDrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWQYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWQYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZ:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXWQrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWQrr:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZ:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZ:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZ:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZ:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZ:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZ:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVW:  [ 0.00  0.00 ]
+Key: VPMOVWBZ:  [ 0.00  0.00 ]
+Key: VPMOVWBZmr:  [ 0.00  0.00 ]
+Key: VPMOVWBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVWBZrr:  [ 0.00  0.00 ]
+Key: VPMOVWBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVWBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBDYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBDYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZ:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBDrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBDrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBQYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBQYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZ:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBQrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBQrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBWYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBWYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZ:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBWrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBWrr:  [ 0.00  0.00 ]
+Key: VPMOVZXDQYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXDQYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZ:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXDQrm:  [ 0.00  0.00 ]
+Key: VPMOVZXDQrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWDYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWDYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZ:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXWDrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWDrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWQYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWQYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZ:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXWQrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWQrr:  [ 0.00  0.00 ]
+Key: VPMULDQYrm:  [ 0.00  0.00 ]
+Key: VPMULDQYrr:  [ 0.00  0.00 ]
+Key: VPMULDQZ:  [ 0.00  0.00 ]
+Key: VPMULDQZrm:  [ 0.00  0.00 ]
+Key: VPMULDQZrmb:  [ 0.00  0.00 ]
+Key: VPMULDQZrmbk:  [ 0.00  0.00 ]
+Key: VPMULDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULDQZrmk:  [ 0.00  0.00 ]
+Key: VPMULDQZrmkz:  [ 0.00  0.00 ]
+Key: VPMULDQZrr:  [ 0.00  0.00 ]
+Key: VPMULDQZrrk:  [ 0.00  0.00 ]
+Key: VPMULDQZrrkz:  [ 0.00  0.00 ]
+Key: VPMULDQrm:  [ 0.00  0.00 ]
+Key: VPMULDQrr:  [ 0.00  0.00 ]
+Key: VPMULHRSWYrm:  [ 0.00  0.00 ]
+Key: VPMULHRSWYrr:  [ 0.00  0.00 ]
+Key: VPMULHRSWZ:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrm:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrmk:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrmkz:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrr:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrrk:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrrkz:  [ 0.00  0.00 ]
+Key: VPMULHRSWrm:  [ 0.00  0.00 ]
+Key: VPMULHRSWrr:  [ 0.00  0.00 ]
+Key: VPMULHUWYrm:  [ 0.00  0.00 ]
+Key: VPMULHUWYrr:  [ 0.00  0.00 ]
+Key: VPMULHUWZ:  [ 0.00  0.00 ]
+Key: VPMULHUWZrm:  [ 0.00  0.00 ]
+Key: VPMULHUWZrmk:  [ 0.00  0.00 ]
+Key: VPMULHUWZrmkz:  [ 0.00  0.00 ]
+Key: VPMULHUWZrr:  [ 0.00  0.00 ]
+Key: VPMULHUWZrrk:  [ 0.00  0.00 ]
+Key: VPMULHUWZrrkz:  [ 0.00  0.00 ]
+Key: VPMULHUWrm:  [ 0.00  0.00 ]
+Key: VPMULHUWrr:  [ 0.00  0.00 ]
+Key: VPMULHWYrm:  [ 0.00  0.00 ]
+Key: VPMULHWYrr:  [ 0.00  0.00 ]
+Key: VPMULHWZ:  [ 0.00  0.00 ]
+Key: VPMULHWZrm:  [ 0.00  0.00 ]
+Key: VPMULHWZrmk:  [ 0.00  0.00 ]
+Key: VPMULHWZrmkz:  [ 0.00  0.00 ]
+Key: VPMULHWZrr:  [ 0.00  0.00 ]
+Key: VPMULHWZrrk:  [ 0.00  0.00 ]
+Key: VPMULHWZrrkz:  [ 0.00  0.00 ]
+Key: VPMULHWrm:  [ 0.00  0.00 ]
+Key: VPMULHWrr:  [ 0.00  0.00 ]
+Key: VPMULLDYrm:  [ 0.00  0.00 ]
+Key: VPMULLDYrr:  [ 0.00  0.00 ]
+Key: VPMULLDZ:  [ 0.00  0.00 ]
+Key: VPMULLDZrm:  [ 0.00  0.00 ]
+Key: VPMULLDZrmb:  [ 0.00  0.00 ]
+Key: VPMULLDZrmbk:  [ 0.00  0.00 ]
+Key: VPMULLDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULLDZrmk:  [ 0.00  0.00 ]
+Key: VPMULLDZrmkz:  [ 0.00  0.00 ]
+Key: VPMULLDZrr:  [ 0.00  0.00 ]
+Key: VPMULLDZrrk:  [ 0.00  0.00 ]
+Key: VPMULLDZrrkz:  [ 0.00  0.00 ]
+Key: VPMULLDrm:  [ 0.00  0.00 ]
+Key: VPMULLDrr:  [ 0.00  0.00 ]
+Key: VPMULLQZ:  [ 0.00  0.00 ]
+Key: VPMULLQZrm:  [ 0.00  0.00 ]
+Key: VPMULLQZrmb:  [ 0.00  0.00 ]
+Key: VPMULLQZrmbk:  [ 0.00  0.00 ]
+Key: VPMULLQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULLQZrmk:  [ 0.00  0.00 ]
+Key: VPMULLQZrmkz:  [ 0.00  0.00 ]
+Key: VPMULLQZrr:  [ 0.00  0.00 ]
+Key: VPMULLQZrrk:  [ 0.00  0.00 ]
+Key: VPMULLQZrrkz:  [ 0.00  0.00 ]
+Key: VPMULLWYrm:  [ 0.00  0.00 ]
+Key: VPMULLWYrr:  [ 0.00  0.00 ]
+Key: VPMULLWZ:  [ 0.00  0.00 ]
+Key: VPMULLWZrm:  [ 0.00  0.00 ]
+Key: VPMULLWZrmk:  [ 0.00  0.00 ]
+Key: VPMULLWZrmkz:  [ 0.00  0.00 ]
+Key: VPMULLWZrr:  [ 0.00  0.00 ]
+Key: VPMULLWZrrk:  [ 0.00  0.00 ]
+Key: VPMULLWZrrkz:  [ 0.00  0.00 ]
+Key: VPMULLWrm:  [ 0.00  0.00 ]
+Key: VPMULLWrr:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZ:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrm:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmb:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmbk:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmk:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmkz:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrr:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrrk:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrrkz:  [ 0.00  0.00 ]
+Key: VPMULUDQYrm:  [ 0.00  0.00 ]
+Key: VPMULUDQYrr:  [ 0.00  0.00 ]
+Key: VPMULUDQZ:  [ 0.00  0.00 ]
+Key: VPMULUDQZrm:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmb:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmbk:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmk:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmkz:  [ 0.00  0.00 ]
+Key: VPMULUDQZrr:  [ 0.00  0.00 ]
+Key: VPMULUDQZrrk:  [ 0.00  0.00 ]
+Key: VPMULUDQZrrkz:  [ 0.00  0.00 ]
+Key: VPMULUDQrm:  [ 0.00  0.00 ]
+Key: VPMULUDQrr:  [ 0.00  0.00 ]
+Key: VPOPCNTBZ:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrm:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrmk:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrmkz:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrr:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrrk:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrrkz:  [ 0.00  0.00 ]
+Key: VPOPCNTDZ:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrm:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmb:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmbk:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmbkz:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmk:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmkz:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrr:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrrk:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrrkz:  [ 0.00  0.00 ]
+Key: VPOPCNTQZ:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrm:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmb:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmbk:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmbkz:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmk:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmkz:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrr:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrrk:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrrkz:  [ 0.00  0.00 ]
+Key: VPOPCNTWZ:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrm:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrmk:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrmkz:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrr:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrrk:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrrkz:  [ 0.00  0.00 ]
+Key: VPORDZ:  [ 0.00  0.00 ]
+Key: VPORDZrm:  [ 0.00  0.00 ]
+Key: VPORDZrmb:  [ 0.00  0.00 ]
+Key: VPORDZrmbk:  [ 0.00  0.00 ]
+Key: VPORDZrmbkz:  [ 0.00  0.00 ]
+Key: VPORDZrmk:  [ 0.00  0.00 ]
+Key: VPORDZrmkz:  [ 0.00  0.00 ]
+Key: VPORDZrr:  [ 0.00  0.00 ]
+Key: VPORDZrrk:  [ 0.00  0.00 ]
+Key: VPORDZrrkz:  [ 0.00  0.00 ]
+Key: VPORQZ:  [ 0.00  0.00 ]
+Key: VPORQZrm:  [ 0.00  0.00 ]
+Key: VPORQZrmb:  [ 0.00  0.00 ]
+Key: VPORQZrmbk:  [ 0.00  0.00 ]
+Key: VPORQZrmbkz:  [ 0.00  0.00 ]
+Key: VPORQZrmk:  [ 0.00  0.00 ]
+Key: VPORQZrmkz:  [ 0.00  0.00 ]
+Key: VPORQZrr:  [ 0.00  0.00 ]
+Key: VPORQZrrk:  [ 0.00  0.00 ]
+Key: VPORQZrrkz:  [ 0.00  0.00 ]
+Key: VPORYrm:  [ 0.00  0.00 ]
+Key: VPORYrr:  [ 0.00  0.00 ]
+Key: VPORrm:  [ 0.00  0.00 ]
+Key: VPORrr:  [ 0.00  0.00 ]
+Key: VPPERMrmr:  [ 0.00  0.00 ]
+Key: VPPERMrrm:  [ 0.00  0.00 ]
+Key: VPPERMrrr:  [ 0.00  0.00 ]
+Key: VPPERMrrr_REV:  [ 0.00  0.00 ]
+Key: VPROLDZ:  [ 0.00  0.00 ]
+Key: VPROLDZmbi:  [ 0.00  0.00 ]
+Key: VPROLDZmbik:  [ 0.00  0.00 ]
+Key: VPROLDZmbikz:  [ 0.00  0.00 ]
+Key: VPROLDZmi:  [ 0.00  0.00 ]
+Key: VPROLDZmik:  [ 0.00  0.00 ]
+Key: VPROLDZmikz:  [ 0.00  0.00 ]
+Key: VPROLDZri:  [ 0.00  0.00 ]
+Key: VPROLDZrik:  [ 0.00  0.00 ]
+Key: VPROLDZrikz:  [ 0.00  0.00 ]
+Key: VPROLQZ:  [ 0.00  0.00 ]
+Key: VPROLQZmbi:  [ 0.00  0.00 ]
+Key: VPROLQZmbik:  [ 0.00  0.00 ]
+Key: VPROLQZmbikz:  [ 0.00  0.00 ]
+Key: VPROLQZmi:  [ 0.00  0.00 ]
+Key: VPROLQZmik:  [ 0.00  0.00 ]
+Key: VPROLQZmikz:  [ 0.00  0.00 ]
+Key: VPROLQZri:  [ 0.00  0.00 ]
+Key: VPROLQZrik:  [ 0.00  0.00 ]
+Key: VPROLQZrikz:  [ 0.00  0.00 ]
+Key: VPROLVDZ:  [ 0.00  0.00 ]
+Key: VPROLVDZrm:  [ 0.00  0.00 ]
+Key: VPROLVDZrmb:  [ 0.00  0.00 ]
+Key: VPROLVDZrmbk:  [ 0.00  0.00 ]
+Key: VPROLVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPROLVDZrmk:  [ 0.00  0.00 ]
+Key: VPROLVDZrmkz:  [ 0.00  0.00 ]
+Key: VPROLVDZrr:  [ 0.00  0.00 ]
+Key: VPROLVDZrrk:  [ 0.00  0.00 ]
+Key: VPROLVDZrrkz:  [ 0.00  0.00 ]
+Key: VPROLVQZ:  [ 0.00  0.00 ]
+Key: VPROLVQZrm:  [ 0.00  0.00 ]
+Key: VPROLVQZrmb:  [ 0.00  0.00 ]
+Key: VPROLVQZrmbk:  [ 0.00  0.00 ]
+Key: VPROLVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPROLVQZrmk:  [ 0.00  0.00 ]
+Key: VPROLVQZrmkz:  [ 0.00  0.00 ]
+Key: VPROLVQZrr:  [ 0.00  0.00 ]
+Key: VPROLVQZrrk:  [ 0.00  0.00 ]
+Key: VPROLVQZrrkz:  [ 0.00  0.00 ]
+Key: VPRORDZ:  [ 0.00  0.00 ]
+Key: VPRORDZmbi:  [ 0.00  0.00 ]
+Key: VPRORDZmbik:  [ 0.00  0.00 ]
+Key: VPRORDZmbikz:  [ 0.00  0.00 ]
+Key: VPRORDZmi:  [ 0.00  0.00 ]
+Key: VPRORDZmik:  [ 0.00  0.00 ]
+Key: VPRORDZmikz:  [ 0.00  0.00 ]
+Key: VPRORDZri:  [ 0.00  0.00 ]
+Key: VPRORDZrik:  [ 0.00  0.00 ]
+Key: VPRORDZrikz:  [ 0.00  0.00 ]
+Key: VPRORQZ:  [ 0.00  0.00 ]
+Key: VPRORQZmbi:  [ 0.00  0.00 ]
+Key: VPRORQZmbik:  [ 0.00  0.00 ]
+Key: VPRORQZmbikz:  [ 0.00  0.00 ]
+Key: VPRORQZmi:  [ 0.00  0.00 ]
+Key: VPRORQZmik:  [ 0.00  0.00 ]
+Key: VPRORQZmikz:  [ 0.00  0.00 ]
+Key: VPRORQZri:  [ 0.00  0.00 ]
+Key: VPRORQZrik:  [ 0.00  0.00 ]
+Key: VPRORQZrikz:  [ 0.00  0.00 ]
+Key: VPRORVDZ:  [ 0.00  0.00 ]
+Key: VPRORVDZrm:  [ 0.00  0.00 ]
+Key: VPRORVDZrmb:  [ 0.00  0.00 ]
+Key: VPRORVDZrmbk:  [ 0.00  0.00 ]
+Key: VPRORVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPRORVDZrmk:  [ 0.00  0.00 ]
+Key: VPRORVDZrmkz:  [ 0.00  0.00 ]
+Key: VPRORVDZrr:  [ 0.00  0.00 ]
+Key: VPRORVDZrrk:  [ 0.00  0.00 ]
+Key: VPRORVDZrrkz:  [ 0.00  0.00 ]
+Key: VPRORVQZ:  [ 0.00  0.00 ]
+Key: VPRORVQZrm:  [ 0.00  0.00 ]
+Key: VPRORVQZrmb:  [ 0.00  0.00 ]
+Key: VPRORVQZrmbk:  [ 0.00  0.00 ]
+Key: VPRORVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPRORVQZrmk:  [ 0.00  0.00 ]
+Key: VPRORVQZrmkz:  [ 0.00  0.00 ]
+Key: VPRORVQZrr:  [ 0.00  0.00 ]
+Key: VPRORVQZrrk:  [ 0.00  0.00 ]
+Key: VPRORVQZrrkz:  [ 0.00  0.00 ]
+Key: VPROTBmi:  [ 0.00  0.00 ]
+Key: VPROTBmr:  [ 0.00  0.00 ]
+Key: VPROTBri:  [ 0.00  0.00 ]
+Key: VPROTBrm:  [ 0.00  0.00 ]
+Key: VPROTBrr:  [ 0.00  0.00 ]
+Key: VPROTBrr_REV:  [ 0.00  0.00 ]
+Key: VPROTDmi:  [ 0.00  0.00 ]
+Key: VPROTDmr:  [ 0.00  0.00 ]
+Key: VPROTDri:  [ 0.00  0.00 ]
+Key: VPROTDrm:  [ 0.00  0.00 ]
+Key: VPROTDrr:  [ 0.00  0.00 ]
+Key: VPROTDrr_REV:  [ 0.00  0.00 ]
+Key: VPROTQmi:  [ 0.00  0.00 ]
+Key: VPROTQmr:  [ 0.00  0.00 ]
+Key: VPROTQri:  [ 0.00  0.00 ]
+Key: VPROTQrm:  [ 0.00  0.00 ]
+Key: VPROTQrr:  [ 0.00  0.00 ]
+Key: VPROTQrr_REV:  [ 0.00  0.00 ]
+Key: VPROTWmi:  [ 0.00  0.00 ]
+Key: VPROTWmr:  [ 0.00  0.00 ]
+Key: VPROTWri:  [ 0.00  0.00 ]
+Key: VPROTWrm:  [ 0.00  0.00 ]
+Key: VPROTWrr:  [ 0.00  0.00 ]
+Key: VPROTWrr_REV:  [ 0.00  0.00 ]
+Key: VPSADBWYrm:  [ 0.00  0.00 ]
+Key: VPSADBWYrr:  [ 0.00  0.00 ]
+Key: VPSADBWZ:  [ 0.00  0.00 ]
+Key: VPSADBWZrm:  [ 0.00  0.00 ]
+Key: VPSADBWZrr:  [ 0.00  0.00 ]
+Key: VPSADBWrm:  [ 0.00  0.00 ]
+Key: VPSADBWrr:  [ 0.00  0.00 ]
+Key: VPSCATTERDDZ:  [ 0.00  0.00 ]
+Key: VPSCATTERDDZmr:  [ 0.00  0.00 ]
+Key: VPSCATTERDQZ:  [ 0.00  0.00 ]
+Key: VPSCATTERDQZmr:  [ 0.00  0.00 ]
+Key: VPSCATTERQDZ:  [ 0.00  0.00 ]
+Key: VPSCATTERQDZmr:  [ 0.00  0.00 ]
+Key: VPSCATTERQQZ:  [ 0.00  0.00 ]
+Key: VPSCATTERQQZmr:  [ 0.00  0.00 ]
+Key: VPSHABmr:  [ 0.00  0.00 ]
+Key: VPSHABrm:  [ 0.00  0.00 ]
+Key: VPSHABrr:  [ 0.00  0.00 ]
+Key: VPSHABrr_REV:  [ 0.00  0.00 ]
+Key: VPSHADmr:  [ 0.00  0.00 ]
+Key: VPSHADrm:  [ 0.00  0.00 ]
+Key: VPSHADrr:  [ 0.00  0.00 ]
+Key: VPSHADrr_REV:  [ 0.00  0.00 ]
+Key: VPSHAQmr:  [ 0.00  0.00 ]
+Key: VPSHAQrm:  [ 0.00  0.00 ]
+Key: VPSHAQrr:  [ 0.00  0.00 ]
+Key: VPSHAQrr_REV:  [ 0.00  0.00 ]
+Key: VPSHAWmr:  [ 0.00  0.00 ]
+Key: VPSHAWrm:  [ 0.00  0.00 ]
+Key: VPSHAWrr:  [ 0.00  0.00 ]
+Key: VPSHAWrr_REV:  [ 0.00  0.00 ]
+Key: VPSHLBmr:  [ 0.00  0.00 ]
+Key: VPSHLBrm:  [ 0.00  0.00 ]
+Key: VPSHLBrr:  [ 0.00  0.00 ]
+Key: VPSHLBrr_REV:  [ 0.00  0.00 ]
+Key: VPSHLDDZ:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmbi:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmbik:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmbikz:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmi:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmik:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmikz:  [ 0.00  0.00 ]
+Key: VPSHLDDZrri:  [ 0.00  0.00 ]
+Key: VPSHLDDZrrik:  [ 0.00  0.00 ]
+Key: VPSHLDDZrrikz:  [ 0.00  0.00 ]
+Key: VPSHLDQZ:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmbi:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmbik:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmbikz:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmi:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmik:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmikz:  [ 0.00  0.00 ]
+Key: VPSHLDQZrri:  [ 0.00  0.00 ]
+Key: VPSHLDQZrrik:  [ 0.00  0.00 ]
+Key: VPSHLDQZrrikz:  [ 0.00  0.00 ]
+Key: VPSHLDVDZ:  [ 0.00  0.00 ]
+Key: VPSHLDVDZm:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmb:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmbk:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmbkz:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmk:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmkz:  [ 0.00  0.00 ]
+Key: VPSHLDVDZr:  [ 0.00  0.00 ]
+Key: VPSHLDVDZrk:  [ 0.00  0.00 ]
+Key: VPSHLDVDZrkz:  [ 0.00  0.00 ]
+Key: VPSHLDVQZ:  [ 0.00  0.00 ]
+Key: VPSHLDVQZm:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmb:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmbk:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmbkz:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmk:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmkz:  [ 0.00  0.00 ]
+Key: VPSHLDVQZr:  [ 0.00  0.00 ]
+Key: VPSHLDVQZrk:  [ 0.00  0.00 ]
+Key: VPSHLDVQZrkz:  [ 0.00  0.00 ]
+Key: VPSHLDVWZ:  [ 0.00  0.00 ]
+Key: VPSHLDVWZm:  [ 0.00  0.00 ]
+Key: VPSHLDVWZmk:  [ 0.00  0.00 ]
+Key: VPSHLDVWZmkz:  [ 0.00  0.00 ]
+Key: VPSHLDVWZr:  [ 0.00  0.00 ]
+Key: VPSHLDVWZrk:  [ 0.00  0.00 ]
+Key: VPSHLDVWZrkz:  [ 0.00  0.00 ]
+Key: VPSHLDWZ:  [ 0.00  0.00 ]
+Key: VPSHLDWZrmi:  [ 0.00  0.00 ]
+Key: VPSHLDWZrmik:  [ 0.00  0.00 ]
+Key: VPSHLDWZrmikz:  [ 0.00  0.00 ]
+Key: VPSHLDWZrri:  [ 0.00  0.00 ]
+Key: VPSHLDWZrrik:  [ 0.00  0.00 ]
+Key: VPSHLDWZrrikz:  [ 0.00  0.00 ]
+Key: VPSHLDmr:  [ 0.00  0.00 ]
+Key: VPSHLDrm:  [ 0.00  0.00 ]
+Key: VPSHLDrr:  [ 0.00  0.00 ]
+Key: VPSHLDrr_REV:  [ 0.00  0.00 ]
+Key: VPSHLQmr:  [ 0.00  0.00 ]
+Key: VPSHLQrm:  [ 0.00  0.00 ]
+Key: VPSHLQrr:  [ 0.00  0.00 ]
+Key: VPSHLQrr_REV:  [ 0.00  0.00 ]
+Key: VPSHLWmr:  [ 0.00  0.00 ]
+Key: VPSHLWrm:  [ 0.00  0.00 ]
+Key: VPSHLWrr:  [ 0.00  0.00 ]
+Key: VPSHLWrr_REV:  [ 0.00  0.00 ]
+Key: VPSHRDDZ:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmbi:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmbik:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmbikz:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmi:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmik:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmikz:  [ 0.00  0.00 ]
+Key: VPSHRDDZrri:  [ 0.00  0.00 ]
+Key: VPSHRDDZrrik:  [ 0.00  0.00 ]
+Key: VPSHRDDZrrikz:  [ 0.00  0.00 ]
+Key: VPSHRDQZ:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmbi:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmbik:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmbikz:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmi:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmik:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmikz:  [ 0.00  0.00 ]
+Key: VPSHRDQZrri:  [ 0.00  0.00 ]
+Key: VPSHRDQZrrik:  [ 0.00  0.00 ]
+Key: VPSHRDQZrrikz:  [ 0.00  0.00 ]
+Key: VPSHRDVDZ:  [ 0.00  0.00 ]
+Key: VPSHRDVDZm:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmb:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmbk:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmbkz:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmk:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmkz:  [ 0.00  0.00 ]
+Key: VPSHRDVDZr:  [ 0.00  0.00 ]
+Key: VPSHRDVDZrk:  [ 0.00  0.00 ]
+Key: VPSHRDVDZrkz:  [ 0.00  0.00 ]
+Key: VPSHRDVQZ:  [ 0.00  0.00 ]
+Key: VPSHRDVQZm:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmb:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmbk:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmbkz:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmk:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmkz:  [ 0.00  0.00 ]
+Key: VPSHRDVQZr:  [ 0.00  0.00 ]
+Key: VPSHRDVQZrk:  [ 0.00  0.00 ]
+Key: VPSHRDVQZrkz:  [ 0.00  0.00 ]
+Key: VPSHRDVWZ:  [ 0.00  0.00 ]
+Key: VPSHRDVWZm:  [ 0.00  0.00 ]
+Key: VPSHRDVWZmk:  [ 0.00  0.00 ]
+Key: VPSHRDVWZmkz:  [ 0.00  0.00 ]
+Key: VPSHRDVWZr:  [ 0.00  0.00 ]
+Key: VPSHRDVWZrk:  [ 0.00  0.00 ]
+Key: VPSHRDVWZrkz:  [ 0.00  0.00 ]
+Key: VPSHRDWZ:  [ 0.00  0.00 ]
+Key: VPSHRDWZrmi:  [ 0.00  0.00 ]
+Key: VPSHRDWZrmik:  [ 0.00  0.00 ]
+Key: VPSHRDWZrmikz:  [ 0.00  0.00 ]
+Key: VPSHRDWZrri:  [ 0.00  0.00 ]
+Key: VPSHRDWZrrik:  [ 0.00  0.00 ]
+Key: VPSHRDWZrrikz:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZ:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZrm:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZrmk:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZrr:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZrrk:  [ 0.00  0.00 ]
+Key: VPSHUFBYrm:  [ 0.00  0.00 ]
+Key: VPSHUFBYrr:  [ 0.00  0.00 ]
+Key: VPSHUFBZ:  [ 0.00  0.00 ]
+Key: VPSHUFBZrm:  [ 0.00  0.00 ]
+Key: VPSHUFBZrmk:  [ 0.00  0.00 ]
+Key: VPSHUFBZrmkz:  [ 0.00  0.00 ]
+Key: VPSHUFBZrr:  [ 0.00  0.00 ]
+Key: VPSHUFBZrrk:  [ 0.00  0.00 ]
+Key: VPSHUFBZrrkz:  [ 0.00  0.00 ]
+Key: VPSHUFBrm:  [ 0.00  0.00 ]
+Key: VPSHUFBrr:  [ 0.00  0.00 ]
+Key: VPSHUFDYmi:  [ 0.00  0.00 ]
+Key: VPSHUFDYri:  [ 0.00  0.00 ]
+Key: VPSHUFDZ:  [ 0.00  0.00 ]
+Key: VPSHUFDZmbi:  [ 0.00  0.00 ]
+Key: VPSHUFDZmbik:  [ 0.00  0.00 ]
+Key: VPSHUFDZmbikz:  [ 0.00  0.00 ]
+Key: VPSHUFDZmi:  [ 0.00  0.00 ]
+Key: VPSHUFDZmik:  [ 0.00  0.00 ]
+Key: VPSHUFDZmikz:  [ 0.00  0.00 ]
+Key: VPSHUFDZri:  [ 0.00  0.00 ]
+Key: VPSHUFDZrik:  [ 0.00  0.00 ]
+Key: VPSHUFDZrikz:  [ 0.00  0.00 ]
+Key: VPSHUFDmi:  [ 0.00  0.00 ]
+Key: VPSHUFDri:  [ 0.00  0.00 ]
+Key: VPSHUFHWYmi:  [ 0.00  0.00 ]
+Key: VPSHUFHWYri:  [ 0.00  0.00 ]
+Key: VPSHUFHWZ:  [ 0.00  0.00 ]
+Key: VPSHUFHWZmi:  [ 0.00  0.00 ]
+Key: VPSHUFHWZmik:  [ 0.00  0.00 ]
+Key: VPSHUFHWZmikz:  [ 0.00  0.00 ]
+Key: VPSHUFHWZri:  [ 0.00  0.00 ]
+Key: VPSHUFHWZrik:  [ 0.00  0.00 ]
+Key: VPSHUFHWZrikz:  [ 0.00  0.00 ]
+Key: VPSHUFHWmi:  [ 0.00  0.00 ]
+Key: VPSHUFHWri:  [ 0.00  0.00 ]
+Key: VPSHUFLWYmi:  [ 0.00  0.00 ]
+Key: VPSHUFLWYri:  [ 0.00  0.00 ]
+Key: VPSHUFLWZ:  [ 0.00  0.00 ]
+Key: VPSHUFLWZmi:  [ 0.00  0.00 ]
+Key: VPSHUFLWZmik:  [ 0.00  0.00 ]
+Key: VPSHUFLWZmikz:  [ 0.00  0.00 ]
+Key: VPSHUFLWZri:  [ 0.00  0.00 ]
+Key: VPSHUFLWZrik:  [ 0.00  0.00 ]
+Key: VPSHUFLWZrikz:  [ 0.00  0.00 ]
+Key: VPSHUFLWmi:  [ 0.00  0.00 ]
+Key: VPSHUFLWri:  [ 0.00  0.00 ]
+Key: VPSIGNBYrm:  [ 0.00  0.00 ]
+Key: VPSIGNBYrr:  [ 0.00  0.00 ]
+Key: VPSIGNBrm:  [ 0.00  0.00 ]
+Key: VPSIGNBrr:  [ 0.00  0.00 ]
+Key: VPSIGNDYrm:  [ 0.00  0.00 ]
+Key: VPSIGNDYrr:  [ 0.00  0.00 ]
+Key: VPSIGNDrm:  [ 0.00  0.00 ]
+Key: VPSIGNDrr:  [ 0.00  0.00 ]
+Key: VPSIGNWYrm:  [ 0.00  0.00 ]
+Key: VPSIGNWYrr:  [ 0.00  0.00 ]
+Key: VPSIGNWrm:  [ 0.00  0.00 ]
+Key: VPSIGNWrr:  [ 0.00  0.00 ]
+Key: VPSLLDQYri:  [ 0.00  0.00 ]
+Key: VPSLLDQZ:  [ 0.00  0.00 ]
+Key: VPSLLDQZmi:  [ 0.00  0.00 ]
+Key: VPSLLDQZri:  [ 0.00  0.00 ]
+Key: VPSLLDQri:  [ 0.00  0.00 ]
+Key: VPSLLDYri:  [ 0.00  0.00 ]
+Key: VPSLLDYrm:  [ 0.00  0.00 ]
+Key: VPSLLDYrr:  [ 0.00  0.00 ]
+Key: VPSLLDZ:  [ 0.00  0.00 ]
+Key: VPSLLDZmbi:  [ 0.00  0.00 ]
+Key: VPSLLDZmbik:  [ 0.00  0.00 ]
+Key: VPSLLDZmbikz:  [ 0.00  0.00 ]
+Key: VPSLLDZmi:  [ 0.00  0.00 ]
+Key: VPSLLDZmik:  [ 0.00  0.00 ]
+Key: VPSLLDZmikz:  [ 0.00  0.00 ]
+Key: VPSLLDZri:  [ 0.00  0.00 ]
+Key: VPSLLDZrik:  [ 0.00  0.00 ]
+Key: VPSLLDZrikz:  [ 0.00  0.00 ]
+Key: VPSLLDZrm:  [ 0.00  0.00 ]
+Key: VPSLLDZrmk:  [ 0.00  0.00 ]
+Key: VPSLLDZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLDZrr:  [ 0.00  0.00 ]
+Key: VPSLLDZrrk:  [ 0.00  0.00 ]
+Key: VPSLLDZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLDri:  [ 0.00  0.00 ]
+Key: VPSLLDrm:  [ 0.00  0.00 ]
+Key: VPSLLDrr:  [ 0.00  0.00 ]
+Key: VPSLLQYri:  [ 0.00  0.00 ]
+Key: VPSLLQYrm:  [ 0.00  0.00 ]
+Key: VPSLLQYrr:  [ 0.00  0.00 ]
+Key: VPSLLQZ:  [ 0.00  0.00 ]
+Key: VPSLLQZmbi:  [ 0.00  0.00 ]
+Key: VPSLLQZmbik:  [ 0.00  0.00 ]
+Key: VPSLLQZmbikz:  [ 0.00  0.00 ]
+Key: VPSLLQZmi:  [ 0.00  0.00 ]
+Key: VPSLLQZmik:  [ 0.00  0.00 ]
+Key: VPSLLQZmikz:  [ 0.00  0.00 ]
+Key: VPSLLQZri:  [ 0.00  0.00 ]
+Key: VPSLLQZrik:  [ 0.00  0.00 ]
+Key: VPSLLQZrikz:  [ 0.00  0.00 ]
+Key: VPSLLQZrm:  [ 0.00  0.00 ]
+Key: VPSLLQZrmk:  [ 0.00  0.00 ]
+Key: VPSLLQZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLQZrr:  [ 0.00  0.00 ]
+Key: VPSLLQZrrk:  [ 0.00  0.00 ]
+Key: VPSLLQZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLQri:  [ 0.00  0.00 ]
+Key: VPSLLQrm:  [ 0.00  0.00 ]
+Key: VPSLLQrr:  [ 0.00  0.00 ]
+Key: VPSLLVDYrm:  [ 0.00  0.00 ]
+Key: VPSLLVDYrr:  [ 0.00  0.00 ]
+Key: VPSLLVDZ:  [ 0.00  0.00 ]
+Key: VPSLLVDZrm:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmb:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmbk:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmk:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLVDZrr:  [ 0.00  0.00 ]
+Key: VPSLLVDZrrk:  [ 0.00  0.00 ]
+Key: VPSLLVDZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLVDrm:  [ 0.00  0.00 ]
+Key: VPSLLVDrr:  [ 0.00  0.00 ]
+Key: VPSLLVQYrm:  [ 0.00  0.00 ]
+Key: VPSLLVQYrr:  [ 0.00  0.00 ]
+Key: VPSLLVQZ:  [ 0.00  0.00 ]
+Key: VPSLLVQZrm:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmb:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmbk:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmk:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLVQZrr:  [ 0.00  0.00 ]
+Key: VPSLLVQZrrk:  [ 0.00  0.00 ]
+Key: VPSLLVQZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLVQrm:  [ 0.00  0.00 ]
+Key: VPSLLVQrr:  [ 0.00  0.00 ]
+Key: VPSLLVWZ:  [ 0.00  0.00 ]
+Key: VPSLLVWZrm:  [ 0.00  0.00 ]
+Key: VPSLLVWZrmk:  [ 0.00  0.00 ]
+Key: VPSLLVWZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLVWZrr:  [ 0.00  0.00 ]
+Key: VPSLLVWZrrk:  [ 0.00  0.00 ]
+Key: VPSLLVWZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLWYri:  [ 0.00  0.00 ]
+Key: VPSLLWYrm:  [ 0.00  0.00 ]
+Key: VPSLLWYrr:  [ 0.00  0.00 ]
+Key: VPSLLWZ:  [ 0.00  0.00 ]
+Key: VPSLLWZmi:  [ 0.00  0.00 ]
+Key: VPSLLWZmik:  [ 0.00  0.00 ]
+Key: VPSLLWZmikz:  [ 0.00  0.00 ]
+Key: VPSLLWZri:  [ 0.00  0.00 ]
+Key: VPSLLWZrik:  [ 0.00  0.00 ]
+Key: VPSLLWZrikz:  [ 0.00  0.00 ]
+Key: VPSLLWZrm:  [ 0.00  0.00 ]
+Key: VPSLLWZrmk:  [ 0.00  0.00 ]
+Key: VPSLLWZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLWZrr:  [ 0.00  0.00 ]
+Key: VPSLLWZrrk:  [ 0.00  0.00 ]
+Key: VPSLLWZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLWri:  [ 0.00  0.00 ]
+Key: VPSLLWrm:  [ 0.00  0.00 ]
+Key: VPSLLWrr:  [ 0.00  0.00 ]
+Key: VPSRADYri:  [ 0.00  0.00 ]
+Key: VPSRADYrm:  [ 0.00  0.00 ]
+Key: VPSRADYrr:  [ 0.00  0.00 ]
+Key: VPSRADZ:  [ 0.00  0.00 ]
+Key: VPSRADZmbi:  [ 0.00  0.00 ]
+Key: VPSRADZmbik:  [ 0.00  0.00 ]
+Key: VPSRADZmbikz:  [ 0.00  0.00 ]
+Key: VPSRADZmi:  [ 0.00  0.00 ]
+Key: VPSRADZmik:  [ 0.00  0.00 ]
+Key: VPSRADZmikz:  [ 0.00  0.00 ]
+Key: VPSRADZri:  [ 0.00  0.00 ]
+Key: VPSRADZrik:  [ 0.00  0.00 ]
+Key: VPSRADZrikz:  [ 0.00  0.00 ]
+Key: VPSRADZrm:  [ 0.00  0.00 ]
+Key: VPSRADZrmk:  [ 0.00  0.00 ]
+Key: VPSRADZrmkz:  [ 0.00  0.00 ]
+Key: VPSRADZrr:  [ 0.00  0.00 ]
+Key: VPSRADZrrk:  [ 0.00  0.00 ]
+Key: VPSRADZrrkz:  [ 0.00  0.00 ]
+Key: VPSRADri:  [ 0.00  0.00 ]
+Key: VPSRADrm:  [ 0.00  0.00 ]
+Key: VPSRADrr:  [ 0.00  0.00 ]
+Key: VPSRAQZ:  [ 0.00  0.00 ]
+Key: VPSRAQZmbi:  [ 0.00  0.00 ]
+Key: VPSRAQZmbik:  [ 0.00  0.00 ]
+Key: VPSRAQZmbikz:  [ 0.00  0.00 ]
+Key: VPSRAQZmi:  [ 0.00  0.00 ]
+Key: VPSRAQZmik:  [ 0.00  0.00 ]
+Key: VPSRAQZmikz:  [ 0.00  0.00 ]
+Key: VPSRAQZri:  [ 0.00  0.00 ]
+Key: VPSRAQZrik:  [ 0.00  0.00 ]
+Key: VPSRAQZrikz:  [ 0.00  0.00 ]
+Key: VPSRAQZrm:  [ 0.00  0.00 ]
+Key: VPSRAQZrmk:  [ 0.00  0.00 ]
+Key: VPSRAQZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAQZrr:  [ 0.00  0.00 ]
+Key: VPSRAQZrrk:  [ 0.00  0.00 ]
+Key: VPSRAQZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAVDYrm:  [ 0.00  0.00 ]
+Key: VPSRAVDYrr:  [ 0.00  0.00 ]
+Key: VPSRAVDZ:  [ 0.00  0.00 ]
+Key: VPSRAVDZrm:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmb:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmbk:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmk:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAVDZrr:  [ 0.00  0.00 ]
+Key: VPSRAVDZrrk:  [ 0.00  0.00 ]
+Key: VPSRAVDZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAVDrm:  [ 0.00  0.00 ]
+Key: VPSRAVDrr:  [ 0.00  0.00 ]
+Key: VPSRAVQZ:  [ 0.00  0.00 ]
+Key: VPSRAVQZrm:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmb:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmbk:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmk:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAVQZrr:  [ 0.00  0.00 ]
+Key: VPSRAVQZrrk:  [ 0.00  0.00 ]
+Key: VPSRAVQZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAVWZ:  [ 0.00  0.00 ]
+Key: VPSRAVWZrm:  [ 0.00  0.00 ]
+Key: VPSRAVWZrmk:  [ 0.00  0.00 ]
+Key: VPSRAVWZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAVWZrr:  [ 0.00  0.00 ]
+Key: VPSRAVWZrrk:  [ 0.00  0.00 ]
+Key: VPSRAVWZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAWYri:  [ 0.00  0.00 ]
+Key: VPSRAWYrm:  [ 0.00  0.00 ]
+Key: VPSRAWYrr:  [ 0.00  0.00 ]
+Key: VPSRAWZ:  [ 0.00  0.00 ]
+Key: VPSRAWZmi:  [ 0.00  0.00 ]
+Key: VPSRAWZmik:  [ 0.00  0.00 ]
+Key: VPSRAWZmikz:  [ 0.00  0.00 ]
+Key: VPSRAWZri:  [ 0.00  0.00 ]
+Key: VPSRAWZrik:  [ 0.00  0.00 ]
+Key: VPSRAWZrikz:  [ 0.00  0.00 ]
+Key: VPSRAWZrm:  [ 0.00  0.00 ]
+Key: VPSRAWZrmk:  [ 0.00  0.00 ]
+Key: VPSRAWZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAWZrr:  [ 0.00  0.00 ]
+Key: VPSRAWZrrk:  [ 0.00  0.00 ]
+Key: VPSRAWZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAWri:  [ 0.00  0.00 ]
+Key: VPSRAWrm:  [ 0.00  0.00 ]
+Key: VPSRAWrr:  [ 0.00  0.00 ]
+Key: VPSRLDQYri:  [ 0.00  0.00 ]
+Key: VPSRLDQZ:  [ 0.00  0.00 ]
+Key: VPSRLDQZmi:  [ 0.00  0.00 ]
+Key: VPSRLDQZri:  [ 0.00  0.00 ]
+Key: VPSRLDQri:  [ 0.00  0.00 ]
+Key: VPSRLDYri:  [ 0.00  0.00 ]
+Key: VPSRLDYrm:  [ 0.00  0.00 ]
+Key: VPSRLDYrr:  [ 0.00  0.00 ]
+Key: VPSRLDZ:  [ 0.00  0.00 ]
+Key: VPSRLDZmbi:  [ 0.00  0.00 ]
+Key: VPSRLDZmbik:  [ 0.00  0.00 ]
+Key: VPSRLDZmbikz:  [ 0.00  0.00 ]
+Key: VPSRLDZmi:  [ 0.00  0.00 ]
+Key: VPSRLDZmik:  [ 0.00  0.00 ]
+Key: VPSRLDZmikz:  [ 0.00  0.00 ]
+Key: VPSRLDZri:  [ 0.00  0.00 ]
+Key: VPSRLDZrik:  [ 0.00  0.00 ]
+Key: VPSRLDZrikz:  [ 0.00  0.00 ]
+Key: VPSRLDZrm:  [ 0.00  0.00 ]
+Key: VPSRLDZrmk:  [ 0.00  0.00 ]
+Key: VPSRLDZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLDZrr:  [ 0.00  0.00 ]
+Key: VPSRLDZrrk:  [ 0.00  0.00 ]
+Key: VPSRLDZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLDri:  [ 0.00  0.00 ]
+Key: VPSRLDrm:  [ 0.00  0.00 ]
+Key: VPSRLDrr:  [ 0.00  0.00 ]
+Key: VPSRLQYri:  [ 0.00  0.00 ]
+Key: VPSRLQYrm:  [ 0.00  0.00 ]
+Key: VPSRLQYrr:  [ 0.00  0.00 ]
+Key: VPSRLQZ:  [ 0.00  0.00 ]
+Key: VPSRLQZmbi:  [ 0.00  0.00 ]
+Key: VPSRLQZmbik:  [ 0.00  0.00 ]
+Key: VPSRLQZmbikz:  [ 0.00  0.00 ]
+Key: VPSRLQZmi:  [ 0.00  0.00 ]
+Key: VPSRLQZmik:  [ 0.00  0.00 ]
+Key: VPSRLQZmikz:  [ 0.00  0.00 ]
+Key: VPSRLQZri:  [ 0.00  0.00 ]
+Key: VPSRLQZrik:  [ 0.00  0.00 ]
+Key: VPSRLQZrikz:  [ 0.00  0.00 ]
+Key: VPSRLQZrm:  [ 0.00  0.00 ]
+Key: VPSRLQZrmk:  [ 0.00  0.00 ]
+Key: VPSRLQZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLQZrr:  [ 0.00  0.00 ]
+Key: VPSRLQZrrk:  [ 0.00  0.00 ]
+Key: VPSRLQZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLQri:  [ 0.00  0.00 ]
+Key: VPSRLQrm:  [ 0.00  0.00 ]
+Key: VPSRLQrr:  [ 0.00  0.00 ]
+Key: VPSRLVDYrm:  [ 0.00  0.00 ]
+Key: VPSRLVDYrr:  [ 0.00  0.00 ]
+Key: VPSRLVDZ:  [ 0.00  0.00 ]
+Key: VPSRLVDZrm:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmb:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmbk:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmk:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLVDZrr:  [ 0.00  0.00 ]
+Key: VPSRLVDZrrk:  [ 0.00  0.00 ]
+Key: VPSRLVDZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLVDrm:  [ 0.00  0.00 ]
+Key: VPSRLVDrr:  [ 0.00  0.00 ]
+Key: VPSRLVQYrm:  [ 0.00  0.00 ]
+Key: VPSRLVQYrr:  [ 0.00  0.00 ]
+Key: VPSRLVQZ:  [ 0.00  0.00 ]
+Key: VPSRLVQZrm:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmb:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmbk:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmk:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLVQZrr:  [ 0.00  0.00 ]
+Key: VPSRLVQZrrk:  [ 0.00  0.00 ]
+Key: VPSRLVQZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLVQrm:  [ 0.00  0.00 ]
+Key: VPSRLVQrr:  [ 0.00  0.00 ]
+Key: VPSRLVWZ:  [ 0.00  0.00 ]
+Key: VPSRLVWZrm:  [ 0.00  0.00 ]
+Key: VPSRLVWZrmk:  [ 0.00  0.00 ]
+Key: VPSRLVWZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLVWZrr:  [ 0.00  0.00 ]
+Key: VPSRLVWZrrk:  [ 0.00  0.00 ]
+Key: VPSRLVWZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLWYri:  [ 0.00  0.00 ]
+Key: VPSRLWYrm:  [ 0.00  0.00 ]
+Key: VPSRLWYrr:  [ 0.00  0.00 ]
+Key: VPSRLWZ:  [ 0.00  0.00 ]
+Key: VPSRLWZmi:  [ 0.00  0.00 ]
+Key: VPSRLWZmik:  [ 0.00  0.00 ]
+Key: VPSRLWZmikz:  [ 0.00  0.00 ]
+Key: VPSRLWZri:  [ 0.00  0.00 ]
+Key: VPSRLWZrik:  [ 0.00  0.00 ]
+Key: VPSRLWZrikz:  [ 0.00  0.00 ]
+Key: VPSRLWZrm:  [ 0.00  0.00 ]
+Key: VPSRLWZrmk:  [ 0.00  0.00 ]
+Key: VPSRLWZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLWZrr:  [ 0.00  0.00 ]
+Key: VPSRLWZrrk:  [ 0.00  0.00 ]
+Key: VPSRLWZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLWri:  [ 0.00  0.00 ]
+Key: VPSRLWrm:  [ 0.00  0.00 ]
+Key: VPSRLWrr:  [ 0.00  0.00 ]
+Key: VPSUBBYrm:  [ 0.00  0.00 ]
+Key: VPSUBBYrr:  [ 0.00  0.00 ]
+Key: VPSUBBZ:  [ 0.00  0.00 ]
+Key: VPSUBBZrm:  [ 0.00  0.00 ]
+Key: VPSUBBZrmk:  [ 0.00  0.00 ]
+Key: VPSUBBZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBBZrr:  [ 0.00  0.00 ]
+Key: VPSUBBZrrk:  [ 0.00  0.00 ]
+Key: VPSUBBZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBBrm:  [ 0.00  0.00 ]
+Key: VPSUBBrr:  [ 0.00  0.00 ]
+Key: VPSUBDYrm:  [ 0.00  0.00 ]
+Key: VPSUBDYrr:  [ 0.00  0.00 ]
+Key: VPSUBDZ:  [ 0.00  0.00 ]
+Key: VPSUBDZrm:  [ 0.00  0.00 ]
+Key: VPSUBDZrmb:  [ 0.00  0.00 ]
+Key: VPSUBDZrmbk:  [ 0.00  0.00 ]
+Key: VPSUBDZrmbkz:  [ 0.00  0.00 ]
+Key: VPSUBDZrmk:  [ 0.00  0.00 ]
+Key: VPSUBDZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBDZrr:  [ 0.00  0.00 ]
+Key: VPSUBDZrrk:  [ 0.00  0.00 ]
+Key: VPSUBDZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBDrm:  [ 0.00  0.00 ]
+Key: VPSUBDrr:  [ 0.00  0.00 ]
+Key: VPSUBQYrm:  [ 0.00  0.00 ]
+Key: VPSUBQYrr:  [ 0.00  0.00 ]
+Key: VPSUBQZ:  [ 0.00  0.00 ]
+Key: VPSUBQZrm:  [ 0.00  0.00 ]
+Key: VPSUBQZrmb:  [ 0.00  0.00 ]
+Key: VPSUBQZrmbk:  [ 0.00  0.00 ]
+Key: VPSUBQZrmbkz:  [ 0.00  0.00 ]
+Key: VPSUBQZrmk:  [ 0.00  0.00 ]
+Key: VPSUBQZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBQZrr:  [ 0.00  0.00 ]
+Key: VPSUBQZrrk:  [ 0.00  0.00 ]
+Key: VPSUBQZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBQrm:  [ 0.00  0.00 ]
+Key: VPSUBQrr:  [ 0.00  0.00 ]
+Key: VPSUBSBYrm:  [ 0.00  0.00 ]
+Key: VPSUBSBYrr:  [ 0.00  0.00 ]
+Key: VPSUBSBZ:  [ 0.00  0.00 ]
+Key: VPSUBSBZrm:  [ 0.00  0.00 ]
+Key: VPSUBSBZrmk:  [ 0.00  0.00 ]
+Key: VPSUBSBZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBSBZrr:  [ 0.00  0.00 ]
+Key: VPSUBSBZrrk:  [ 0.00  0.00 ]
+Key: VPSUBSBZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBSBrm:  [ 0.00  0.00 ]
+Key: VPSUBSBrr:  [ 0.00  0.00 ]
+Key: VPSUBSWYrm:  [ 0.00  0.00 ]
+Key: VPSUBSWYrr:  [ 0.00  0.00 ]
+Key: VPSUBSWZ:  [ 0.00  0.00 ]
+Key: VPSUBSWZrm:  [ 0.00  0.00 ]
+Key: VPSUBSWZrmk:  [ 0.00  0.00 ]
+Key: VPSUBSWZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBSWZrr:  [ 0.00  0.00 ]
+Key: VPSUBSWZrrk:  [ 0.00  0.00 ]
+Key: VPSUBSWZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBSWrm:  [ 0.00  0.00 ]
+Key: VPSUBSWrr:  [ 0.00  0.00 ]
+Key: VPSUBUSBYrm:  [ 0.00  0.00 ]
+Key: VPSUBUSBYrr:  [ 0.00  0.00 ]
+Key: VPSUBUSBZ:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrm:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrmk:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrr:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrrk:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBUSBrm:  [ 0.00  0.00 ]
+Key: VPSUBUSBrr:  [ 0.00  0.00 ]
+Key: VPSUBUSWYrm:  [ 0.00  0.00 ]
+Key: VPSUBUSWYrr:  [ 0.00  0.00 ]
+Key: VPSUBUSWZ:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrm:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrmk:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrr:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrrk:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBUSWrm:  [ 0.00  0.00 ]
+Key: VPSUBUSWrr:  [ 0.00  0.00 ]
+Key: VPSUBWYrm:  [ 0.00  0.00 ]
+Key: VPSUBWYrr:  [ 0.00  0.00 ]
+Key: VPSUBWZ:  [ 0.00  0.00 ]
+Key: VPSUBWZrm:  [ 0.00  0.00 ]
+Key: VPSUBWZrmk:  [ 0.00  0.00 ]
+Key: VPSUBWZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBWZrr:  [ 0.00  0.00 ]
+Key: VPSUBWZrrk:  [ 0.00  0.00 ]
+Key: VPSUBWZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBWrm:  [ 0.00  0.00 ]
+Key: VPSUBWrr:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZ:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmbi:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmbik:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmbikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmi:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmik:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrri:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrrik:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrrikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZ:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmbi:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmbik:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmbikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmi:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmik:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrri:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrrik:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrrikz:  [ 0.00  0.00 ]
+Key: VPTESTMBZ:  [ 0.00  0.00 ]
+Key: VPTESTMBZrm:  [ 0.00  0.00 ]
+Key: VPTESTMBZrmk:  [ 0.00  0.00 ]
+Key: VPTESTMBZrr:  [ 0.00  0.00 ]
+Key: VPTESTMBZrrk:  [ 0.00  0.00 ]
+Key: VPTESTMDZ:  [ 0.00  0.00 ]
+Key: VPTESTMDZrm:  [ 0.00  0.00 ]
+Key: VPTESTMDZrmb:  [ 0.00  0.00 ]
+Key: VPTESTMDZrmbk:  [ 0.00  0.00 ]
+Key: VPTESTMDZrmk:  [ 0.00  0.00 ]
+Key: VPTESTMDZrr:  [ 0.00  0.00 ]
+Key: VPTESTMDZrrk:  [ 0.00  0.00 ]
+Key: VPTESTMQZ:  [ 0.00  0.00 ]
+Key: VPTESTMQZrm:  [ 0.00  0.00 ]
+Key: VPTESTMQZrmb:  [ 0.00  0.00 ]
+Key: VPTESTMQZrmbk:  [ 0.00  0.00 ]
+Key: VPTESTMQZrmk:  [ 0.00  0.00 ]
+Key: VPTESTMQZrr:  [ 0.00  0.00 ]
+Key: VPTESTMQZrrk:  [ 0.00  0.00 ]
+Key: VPTESTMWZ:  [ 0.00  0.00 ]
+Key: VPTESTMWZrm:  [ 0.00  0.00 ]
+Key: VPTESTMWZrmk:  [ 0.00  0.00 ]
+Key: VPTESTMWZrr:  [ 0.00  0.00 ]
+Key: VPTESTMWZrrk:  [ 0.00  0.00 ]
+Key: VPTESTNMBZ:  [ 0.00  0.00 ]
+Key: VPTESTNMBZrm:  [ 0.00  0.00 ]
+Key: VPTESTNMBZrmk:  [ 0.00  0.00 ]
+Key: VPTESTNMBZrr:  [ 0.00  0.00 ]
+Key: VPTESTNMBZrrk:  [ 0.00  0.00 ]
+Key: VPTESTNMDZ:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrm:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrmb:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrmbk:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrmk:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrr:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrrk:  [ 0.00  0.00 ]
+Key: VPTESTNMQZ:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrm:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrmb:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrmbk:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrmk:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrr:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrrk:  [ 0.00  0.00 ]
+Key: VPTESTNMWZ:  [ 0.00  0.00 ]
+Key: VPTESTNMWZrm:  [ 0.00  0.00 ]
+Key: VPTESTNMWZrmk:  [ 0.00  0.00 ]
+Key: VPTESTNMWZrr:  [ 0.00  0.00 ]
+Key: VPTESTNMWZrrk:  [ 0.00  0.00 ]
+Key: VPTESTYrm:  [ 0.00  0.00 ]
+Key: VPTESTYrr:  [ 0.00  0.00 ]
+Key: VPTESTrm:  [ 0.00  0.00 ]
+Key: VPTESTrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZ:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZ:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmb:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmbk:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZ:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmb:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmbk:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZ:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZ:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZ:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmb:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmbk:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZ:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmb:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmbk:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZ:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDrr:  [ 0.00  0.00 ]
+Key: VPXORDZ:  [ 0.00  0.00 ]
+Key: VPXORDZrm:  [ 0.00  0.00 ]
+Key: VPXORDZrmb:  [ 0.00  0.00 ]
+Key: VPXORDZrmbk:  [ 0.00  0.00 ]
+Key: VPXORDZrmbkz:  [ 0.00  0.00 ]
+Key: VPXORDZrmk:  [ 0.00  0.00 ]
+Key: VPXORDZrmkz:  [ 0.00  0.00 ]
+Key: VPXORDZrr:  [ 0.00  0.00 ]
+Key: VPXORDZrrk:  [ 0.00  0.00 ]
+Key: VPXORDZrrkz:  [ 0.00  0.00 ]
+Key: VPXORQZ:  [ 0.00  0.00 ]
+Key: VPXORQZrm:  [ 0.00  0.00 ]
+Key: VPXORQZrmb:  [ 0.00  0.00 ]
+Key: VPXORQZrmbk:  [ 0.00  0.00 ]
+Key: VPXORQZrmbkz:  [ 0.00  0.00 ]
+Key: VPXORQZrmk:  [ 0.00  0.00 ]
+Key: VPXORQZrmkz:  [ 0.00  0.00 ]
+Key: VPXORQZrr:  [ 0.00  0.00 ]
+Key: VPXORQZrrk:  [ 0.00  0.00 ]
+Key: VPXORQZrrkz:  [ 0.00  0.00 ]
+Key: VPXORYrm:  [ 0.00  0.00 ]
+Key: VPXORYrr:  [ 0.00  0.00 ]
+Key: VPXORrm:  [ 0.00  0.00 ]
+Key: VPXORrr:  [ 0.00  0.00 ]
+Key: VRANGEPDZ:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmbi:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmbik:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmbikz:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmi:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmik:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmikz:  [ 0.00  0.00 ]
+Key: VRANGEPDZrri:  [ 0.00  0.00 ]
+Key: VRANGEPDZrrib:  [ 0.00  0.00 ]
+Key: VRANGEPDZrribk:  [ 0.00  0.00 ]
+Key: VRANGEPDZrribkz:  [ 0.00  0.00 ]
+Key: VRANGEPDZrrik:  [ 0.00  0.00 ]
+Key: VRANGEPDZrrikz:  [ 0.00  0.00 ]
+Key: VRANGEPSZ:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmbi:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmbik:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmbikz:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmi:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmik:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmikz:  [ 0.00  0.00 ]
+Key: VRANGEPSZrri:  [ 0.00  0.00 ]
+Key: VRANGEPSZrrib:  [ 0.00  0.00 ]
+Key: VRANGEPSZrribk:  [ 0.00  0.00 ]
+Key: VRANGEPSZrribkz:  [ 0.00  0.00 ]
+Key: VRANGEPSZrrik:  [ 0.00  0.00 ]
+Key: VRANGEPSZrrikz:  [ 0.00  0.00 ]
+Key: VRANGESDZrmi:  [ 0.00  0.00 ]
+Key: VRANGESDZrmik:  [ 0.00  0.00 ]
+Key: VRANGESDZrmikz:  [ 0.00  0.00 ]
+Key: VRANGESDZrri:  [ 0.00  0.00 ]
+Key: VRANGESDZrrib:  [ 0.00  0.00 ]
+Key: VRANGESDZrribk:  [ 0.00  0.00 ]
+Key: VRANGESDZrribkz:  [ 0.00  0.00 ]
+Key: VRANGESDZrrik:  [ 0.00  0.00 ]
+Key: VRANGESDZrrikz:  [ 0.00  0.00 ]
+Key: VRANGESSZrmi:  [ 0.00  0.00 ]
+Key: VRANGESSZrmik:  [ 0.00  0.00 ]
+Key: VRANGESSZrmikz:  [ 0.00  0.00 ]
+Key: VRANGESSZrri:  [ 0.00  0.00 ]
+Key: VRANGESSZrrib:  [ 0.00  0.00 ]
+Key: VRANGESSZrribk:  [ 0.00  0.00 ]
+Key: VRANGESSZrribkz:  [ 0.00  0.00 ]
+Key: VRANGESSZrrik:  [ 0.00  0.00 ]
+Key: VRANGESSZrrikz:  [ 0.00  0.00 ]
+Key: VRCP:  [ 0.00  0.00 ]
+Key: VRCPBF:  [ 0.00  0.00 ]
+Key: VRCPPHZ:  [ 0.00  0.00 ]
+Key: VRCPPHZm:  [ 0.00  0.00 ]
+Key: VRCPPHZmb:  [ 0.00  0.00 ]
+Key: VRCPPHZmbk:  [ 0.00  0.00 ]
+Key: VRCPPHZmbkz:  [ 0.00  0.00 ]
+Key: VRCPPHZmk:  [ 0.00  0.00 ]
+Key: VRCPPHZmkz:  [ 0.00  0.00 ]
+Key: VRCPPHZr:  [ 0.00  0.00 ]
+Key: VRCPPHZrk:  [ 0.00  0.00 ]
+Key: VRCPPHZrkz:  [ 0.00  0.00 ]
+Key: VRCPPSYm:  [ 0.00  0.00 ]
+Key: VRCPPSYr:  [ 0.00  0.00 ]
+Key: VRCPPSm:  [ 0.00  0.00 ]
+Key: VRCPPSr:  [ 0.00  0.00 ]
+Key: VRCPSHZrm:  [ 0.00  0.00 ]
+Key: VRCPSHZrmk:  [ 0.00  0.00 ]
+Key: VRCPSHZrmkz:  [ 0.00  0.00 ]
+Key: VRCPSHZrr:  [ 0.00  0.00 ]
+Key: VRCPSHZrrk:  [ 0.00  0.00 ]
+Key: VRCPSHZrrkz:  [ 0.00  0.00 ]
+Key: VRCPSSm:  [ 0.00  0.00 ]
+Key: VRCPSSm_Int:  [ 0.00  0.00 ]
+Key: VRCPSSr:  [ 0.00  0.00 ]
+Key: VRCPSSr_Int:  [ 0.00  0.00 ]
+Key: VREDUCEBF:  [ 0.00  0.00 ]
+Key: VREDUCEPDZ:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmbi:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmbik:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmbikz:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmi:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmik:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrri:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrrib:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrribk:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrrik:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCEPHZ:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmbi:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmbik:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmbikz:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmi:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmik:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrri:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrrib:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrribk:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrrik:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCEPSZ:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmbi:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmbik:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmbikz:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmi:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmik:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrri:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrrib:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrribk:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrrik:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCESDZrmi:  [ 0.00  0.00 ]
+Key: VREDUCESDZrmik:  [ 0.00  0.00 ]
+Key: VREDUCESDZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCESDZrri:  [ 0.00  0.00 ]
+Key: VREDUCESDZrrib:  [ 0.00  0.00 ]
+Key: VREDUCESDZrribk:  [ 0.00  0.00 ]
+Key: VREDUCESDZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCESDZrrik:  [ 0.00  0.00 ]
+Key: VREDUCESDZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCESHZrmi:  [ 0.00  0.00 ]
+Key: VREDUCESHZrmik:  [ 0.00  0.00 ]
+Key: VREDUCESHZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCESHZrri:  [ 0.00  0.00 ]
+Key: VREDUCESHZrrib:  [ 0.00  0.00 ]
+Key: VREDUCESHZrribk:  [ 0.00  0.00 ]
+Key: VREDUCESHZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCESHZrrik:  [ 0.00  0.00 ]
+Key: VREDUCESHZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCESSZrmi:  [ 0.00  0.00 ]
+Key: VREDUCESSZrmik:  [ 0.00  0.00 ]
+Key: VREDUCESSZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCESSZrri:  [ 0.00  0.00 ]
+Key: VREDUCESSZrrib:  [ 0.00  0.00 ]
+Key: VREDUCESSZrribk:  [ 0.00  0.00 ]
+Key: VREDUCESSZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCESSZrrik:  [ 0.00  0.00 ]
+Key: VREDUCESSZrrikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEBF:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZ:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmbi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmbik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmbikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrrib:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrribk:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrribkz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrrik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrrikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZ:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmbi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmbik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmbikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrrib:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrribk:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrribkz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrrik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrrikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZ:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmbi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmbik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmbikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrrib:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrribk:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrribkz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrrik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrrikz:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrmi_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrmik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrmikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrri_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrrib_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrribk_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrribkz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrrik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrrikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrmi_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrmik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrmikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrri_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrrib_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrribk_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrribkz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrrik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrrikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrmi_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrmik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrmikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrri_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrrib_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrribk_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrribkz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrrik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrrikz_Int:  [ 0.00  0.00 ]
+Key: VROUNDPDYmi:  [ 0.00  0.00 ]
+Key: VROUNDPDYri:  [ 0.00  0.00 ]
+Key: VROUNDPDmi:  [ 0.00  0.00 ]
+Key: VROUNDPDri:  [ 0.00  0.00 ]
+Key: VROUNDPSYmi:  [ 0.00  0.00 ]
+Key: VROUNDPSYri:  [ 0.00  0.00 ]
+Key: VROUNDPSmi:  [ 0.00  0.00 ]
+Key: VROUNDPSri:  [ 0.00  0.00 ]
+Key: VROUNDSDmi:  [ 0.00  0.00 ]
+Key: VROUNDSDmi_Int:  [ 0.00  0.00 ]
+Key: VROUNDSDri:  [ 0.00  0.00 ]
+Key: VROUNDSDri_Int:  [ 0.00  0.00 ]
+Key: VROUNDSSmi:  [ 0.00  0.00 ]
+Key: VROUNDSSmi_Int:  [ 0.00  0.00 ]
+Key: VROUNDSSri:  [ 0.00  0.00 ]
+Key: VROUNDSSri_Int:  [ 0.00  0.00 ]
+Key: VRSQRT:  [ 0.00  0.00 ]
+Key: VRSQRTBF:  [ 0.00  0.00 ]
+Key: VRSQRTPHZ:  [ 0.00  0.00 ]
+Key: VRSQRTPHZm:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmb:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmbk:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmbkz:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmk:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmkz:  [ 0.00  0.00 ]
+Key: VRSQRTPHZr:  [ 0.00  0.00 ]
+Key: VRSQRTPHZrk:  [ 0.00  0.00 ]
+Key: VRSQRTPHZrkz:  [ 0.00  0.00 ]
+Key: VRSQRTPSYm:  [ 0.00  0.00 ]
+Key: VRSQRTPSYr:  [ 0.00  0.00 ]
+Key: VRSQRTPSm:  [ 0.00  0.00 ]
+Key: VRSQRTPSr:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrm:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrmk:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrmkz:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrr:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrrk:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrrkz:  [ 0.00  0.00 ]
+Key: VRSQRTSSm:  [ 0.00  0.00 ]
+Key: VRSQRTSSm_Int:  [ 0.00  0.00 ]
+Key: VRSQRTSSr:  [ 0.00  0.00 ]
+Key: VRSQRTSSr_Int:  [ 0.00  0.00 ]
+Key: VSCALEFBF:  [ 0.00  0.00 ]
+Key: VSCALEFPDZ:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrm:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmb:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmbk:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrr:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrb:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrbk:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFPHZ:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrm:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmb:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmbk:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrr:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrb:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrbk:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFPSZ:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrm:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmb:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmbk:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrr:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrb:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrbk:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrm:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrr:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrm:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrr:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrm:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrr:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrkz:  [ 0.00  0.00 ]
+Key: VSCATTERDPDZ:  [ 0.00  0.00 ]
+Key: VSCATTERDPDZmr:  [ 0.00  0.00 ]
+Key: VSCATTERDPSZ:  [ 0.00  0.00 ]
+Key: VSCATTERDPSZmr:  [ 0.00  0.00 ]
+Key: VSCATTERPF:  [ 0.00  0.00 ]
+Key: VSCATTERQPDZ:  [ 0.00  0.00 ]
+Key: VSCATTERQPDZmr:  [ 0.00  0.00 ]
+Key: VSCATTERQPSZ:  [ 0.00  0.00 ]
+Key: VSCATTERQPSZmr:  [ 0.00  0.00 ]
+Key: VSHA:  [ 0.00  0.00 ]
+Key: VSHUFF:  [ 0.00  0.00 ]
+Key: VSHUFI:  [ 0.00  0.00 ]
+Key: VSHUFPDYrmi:  [ 0.00  0.00 ]
+Key: VSHUFPDYrri:  [ 0.00  0.00 ]
+Key: VSHUFPDZ:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmbi:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmbik:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmbikz:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmi:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmik:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmikz:  [ 0.00  0.00 ]
+Key: VSHUFPDZrri:  [ 0.00  0.00 ]
+Key: VSHUFPDZrrik:  [ 0.00  0.00 ]
+Key: VSHUFPDZrrikz:  [ 0.00  0.00 ]
+Key: VSHUFPDrmi:  [ 0.00  0.00 ]
+Key: VSHUFPDrri:  [ 0.00  0.00 ]
+Key: VSHUFPSYrmi:  [ 0.00  0.00 ]
+Key: VSHUFPSYrri:  [ 0.00  0.00 ]
+Key: VSHUFPSZ:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmbi:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmbik:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmbikz:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmi:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmik:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmikz:  [ 0.00  0.00 ]
+Key: VSHUFPSZrri:  [ 0.00  0.00 ]
+Key: VSHUFPSZrrik:  [ 0.00  0.00 ]
+Key: VSHUFPSZrrikz:  [ 0.00  0.00 ]
+Key: VSHUFPSrmi:  [ 0.00  0.00 ]
+Key: VSHUFPSrri:  [ 0.00  0.00 ]
+Key: VSM:  [ 0.00  0.00 ]
+Key: VSQRTBF:  [ 0.00  0.00 ]
+Key: VSQRTPDYm:  [ 0.00  0.00 ]
+Key: VSQRTPDYr:  [ 0.00  0.00 ]
+Key: VSQRTPDZ:  [ 0.00  0.00 ]
+Key: VSQRTPDZm:  [ 0.00  0.00 ]
+Key: VSQRTPDZmb:  [ 0.00  0.00 ]
+Key: VSQRTPDZmbk:  [ 0.00  0.00 ]
+Key: VSQRTPDZmbkz:  [ 0.00  0.00 ]
+Key: VSQRTPDZmk:  [ 0.00  0.00 ]
+Key: VSQRTPDZmkz:  [ 0.00  0.00 ]
+Key: VSQRTPDZr:  [ 0.00  0.00 ]
+Key: VSQRTPDZrb:  [ 0.00  0.00 ]
+Key: VSQRTPDZrbk:  [ 0.00  0.00 ]
+Key: VSQRTPDZrbkz:  [ 0.00  0.00 ]
+Key: VSQRTPDZrk:  [ 0.00  0.00 ]
+Key: VSQRTPDZrkz:  [ 0.00  0.00 ]
+Key: VSQRTPDm:  [ 0.00  0.00 ]
+Key: VSQRTPDr:  [ 0.00  0.00 ]
+Key: VSQRTPHZ:  [ 0.00  0.00 ]
+Key: VSQRTPHZm:  [ 0.00  0.00 ]
+Key: VSQRTPHZmb:  [ 0.00  0.00 ]
+Key: VSQRTPHZmbk:  [ 0.00  0.00 ]
+Key: VSQRTPHZmbkz:  [ 0.00  0.00 ]
+Key: VSQRTPHZmk:  [ 0.00  0.00 ]
+Key: VSQRTPHZmkz:  [ 0.00  0.00 ]
+Key: VSQRTPHZr:  [ 0.00  0.00 ]
+Key: VSQRTPHZrb:  [ 0.00  0.00 ]
+Key: VSQRTPHZrbk:  [ 0.00  0.00 ]
+Key: VSQRTPHZrbkz:  [ 0.00  0.00 ]
+Key: VSQRTPHZrk:  [ 0.00  0.00 ]
+Key: VSQRTPHZrkz:  [ 0.00  0.00 ]
+Key: VSQRTPSYm:  [ 0.00  0.00 ]
+Key: VSQRTPSYr:  [ 0.00  0.00 ]
+Key: VSQRTPSZ:  [ 0.00  0.00 ]
+Key: VSQRTPSZm:  [ 0.00  0.00 ]
+Key: VSQRTPSZmb:  [ 0.00  0.00 ]
+Key: VSQRTPSZmbk:  [ 0.00  0.00 ]
+Key: VSQRTPSZmbkz:  [ 0.00  0.00 ]
+Key: VSQRTPSZmk:  [ 0.00  0.00 ]
+Key: VSQRTPSZmkz:  [ 0.00  0.00 ]
+Key: VSQRTPSZr:  [ 0.00  0.00 ]
+Key: VSQRTPSZrb:  [ 0.00  0.00 ]
+Key: VSQRTPSZrbk:  [ 0.00  0.00 ]
+Key: VSQRTPSZrbkz:  [ 0.00  0.00 ]
+Key: VSQRTPSZrk:  [ 0.00  0.00 ]
+Key: VSQRTPSZrkz:  [ 0.00  0.00 ]
+Key: VSQRTPSm:  [ 0.00  0.00 ]
+Key: VSQRTPSr:  [ 0.00  0.00 ]
+Key: VSQRTSDZm:  [ 0.00  0.00 ]
+Key: VSQRTSDZm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZmk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZmkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZr:  [ 0.00  0.00 ]
+Key: VSQRTSDZr_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrb_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrbk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrbkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDm:  [ 0.00  0.00 ]
+Key: VSQRTSDm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDr:  [ 0.00  0.00 ]
+Key: VSQRTSDr_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZm:  [ 0.00  0.00 ]
+Key: VSQRTSHZm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZmk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZmkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZr:  [ 0.00  0.00 ]
+Key: VSQRTSHZr_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrb_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrbk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrbkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZm:  [ 0.00  0.00 ]
+Key: VSQRTSSZm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZmk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZmkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZr:  [ 0.00  0.00 ]
+Key: VSQRTSSZr_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrb_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrbk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrbkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSm:  [ 0.00  0.00 ]
+Key: VSQRTSSm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSr:  [ 0.00  0.00 ]
+Key: VSQRTSSr_Int:  [ 0.00  0.00 ]
+Key: VSTMXCSR:  [ 0.00  0.00 ]
+Key: VSUBBF:  [ 0.00  0.00 ]
+Key: VSUBPDYrm:  [ 0.00  0.00 ]
+Key: VSUBPDYrr:  [ 0.00  0.00 ]
+Key: VSUBPDZ:  [ 0.00  0.00 ]
+Key: VSUBPDZrm:  [ 0.00  0.00 ]
+Key: VSUBPDZrmb:  [ 0.00  0.00 ]
+Key: VSUBPDZrmbk:  [ 0.00  0.00 ]
+Key: VSUBPDZrmbkz:  [ 0.00  0.00 ]
+Key: VSUBPDZrmk:  [ 0.00  0.00 ]
+Key: VSUBPDZrmkz:  [ 0.00  0.00 ]
+Key: VSUBPDZrr:  [ 0.00  0.00 ]
+Key: VSUBPDZrrb:  [ 0.00  0.00 ]
+Key: VSUBPDZrrbk:  [ 0.00  0.00 ]
+Key: VSUBPDZrrbkz:  [ 0.00  0.00 ]
+Key: VSUBPDZrrk:  [ 0.00  0.00 ]
+Key: VSUBPDZrrkz:  [ 0.00  0.00 ]
+Key: VSUBPDrm:  [ 0.00  0.00 ]
+Key: VSUBPDrr:  [ 0.00  0.00 ]
+Key: VSUBPHZ:  [ 0.00  0.00 ]
+Key: VSUBPHZrm:  [ 0.00  0.00 ]
+Key: VSUBPHZrmb:  [ 0.00  0.00 ]
+Key: VSUBPHZrmbk:  [ 0.00  0.00 ]
+Key: VSUBPHZrmbkz:  [ 0.00  0.00 ]
+Key: VSUBPHZrmk:  [ 0.00  0.00 ]
+Key: VSUBPHZrmkz:  [ 0.00  0.00 ]
+Key: VSUBPHZrr:  [ 0.00  0.00 ]
+Key: VSUBPHZrrb:  [ 0.00  0.00 ]
+Key: VSUBPHZrrbk:  [ 0.00  0.00 ]
+Key: VSUBPHZrrbkz:  [ 0.00  0.00 ]
+Key: VSUBPHZrrk:  [ 0.00  0.00 ]
+Key: VSUBPHZrrkz:  [ 0.00  0.00 ]
+Key: VSUBPSYrm:  [ 0.00  0.00 ]
+Key: VSUBPSYrr:  [ 0.00  0.00 ]
+Key: VSUBPSZ:  [ 0.00  0.00 ]
+Key: VSUBPSZrm:  [ 0.00  0.00 ]
+Key: VSUBPSZrmb:  [ 0.00  0.00 ]
+Key: VSUBPSZrmbk:  [ 0.00  0.00 ]
+Key: VSUBPSZrmbkz:  [ 0.00  0.00 ]
+Key: VSUBPSZrmk:  [ 0.00  0.00 ]
+Key: VSUBPSZrmkz:  [ 0.00  0.00 ]
+Key: VSUBPSZrr:  [ 0.00  0.00 ]
+Key: VSUBPSZrrb:  [ 0.00  0.00 ]
+Key: VSUBPSZrrbk:  [ 0.00  0.00 ]
+Key: VSUBPSZrrbkz:  [ 0.00  0.00 ]
+Key: VSUBPSZrrk:  [ 0.00  0.00 ]
+Key: VSUBPSZrrkz:  [ 0.00  0.00 ]
+Key: VSUBPSrm:  [ 0.00  0.00 ]
+Key: VSUBPSrr:  [ 0.00  0.00 ]
+Key: VSUBSDZrm:  [ 0.00  0.00 ]
+Key: VSUBSDZrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrr:  [ 0.00  0.00 ]
+Key: VSUBSDZrr_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSDrm:  [ 0.00  0.00 ]
+Key: VSUBSDrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSDrr:  [ 0.00  0.00 ]
+Key: VSUBSDrr_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrm:  [ 0.00  0.00 ]
+Key: VSUBSHZrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrr:  [ 0.00  0.00 ]
+Key: VSUBSHZrr_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrm:  [ 0.00  0.00 ]
+Key: VSUBSSZrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrr:  [ 0.00  0.00 ]
+Key: VSUBSSZrr_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSSrm:  [ 0.00  0.00 ]
+Key: VSUBSSrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSSrr:  [ 0.00  0.00 ]
+Key: VSUBSSrr_Int:  [ 0.00  0.00 ]
+Key: VTESTPDYrm:  [ 0.00  0.00 ]
+Key: VTESTPDYrr:  [ 0.00  0.00 ]
+Key: VTESTPDrm:  [ 0.00  0.00 ]
+Key: VTESTPDrr:  [ 0.00  0.00 ]
+Key: VTESTPSYrm:  [ 0.00  0.00 ]
+Key: VTESTPSYrr:  [ 0.00  0.00 ]
+Key: VTESTPSrm:  [ 0.00  0.00 ]
+Key: VTESTPSrr:  [ 0.00  0.00 ]
+Key: VUCOMISDZrm:  [ 0.00  0.00 ]
+Key: VUCOMISDZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISDZrr:  [ 0.00  0.00 ]
+Key: VUCOMISDZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMISDZrrb:  [ 0.00  0.00 ]
+Key: VUCOMISDrm:  [ 0.00  0.00 ]
+Key: VUCOMISDrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISDrr:  [ 0.00  0.00 ]
+Key: VUCOMISDrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMISHZrm:  [ 0.00  0.00 ]
+Key: VUCOMISHZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISHZrr:  [ 0.00  0.00 ]
+Key: VUCOMISHZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMISHZrrb:  [ 0.00  0.00 ]
+Key: VUCOMISSZrm:  [ 0.00  0.00 ]
+Key: VUCOMISSZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISSZrr:  [ 0.00  0.00 ]
+Key: VUCOMISSZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMISSZrrb:  [ 0.00  0.00 ]
+Key: VUCOMISSrm:  [ 0.00  0.00 ]
+Key: VUCOMISSrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISSrr:  [ 0.00  0.00 ]
+Key: VUCOMISSrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrm:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrr:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrm:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrr:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrm:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrr:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VUNPCKHPDYrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPDYrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZ:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmb:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmbk:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmbkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmk:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrrk:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrrkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPDrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPDrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPSYrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPSYrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZ:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmb:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmbk:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmbkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmk:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrrk:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrrkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPSrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPSrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPDYrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPDYrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZ:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmb:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmbk:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmbkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmk:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrrk:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrrkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPDrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPDrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPSYrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPSYrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZ:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmb:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmbk:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmbkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmk:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrrk:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrrkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPSrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPSrr:  [ 0.00  0.00 ]
+Key: VXORPDYrm:  [ 0.00  0.00 ]
+Key: VXORPDYrr:  [ 0.00  0.00 ]
+Key: VXORPDZ:  [ 0.00  0.00 ]
+Key: VXORPDZrm:  [ 0.00  0.00 ]
+Key: VXORPDZrmb:  [ 0.00  0.00 ]
+Key: VXORPDZrmbk:  [ 0.00  0.00 ]
+Key: VXORPDZrmbkz:  [ 0.00  0.00 ]
+Key: VXORPDZrmk:  [ 0.00  0.00 ]
+Key: VXORPDZrmkz:  [ 0.00  0.00 ]
+Key: VXORPDZrr:  [ 0.00  0.00 ]
+Key: VXORPDZrrk:  [ 0.00  0.00 ]
+Key: VXORPDZrrkz:  [ 0.00  0.00 ]
+Key: VXORPDrm:  [ 0.00  0.00 ]
+Key: VXORPDrr:  [ 0.00  0.00 ]
+Key: VXORPSYrm:  [ 0.00  0.00 ]
+Key: VXORPSYrr:  [ 0.00  0.00 ]
+Key: VXORPSZ:  [ 0.00  0.00 ]
+Key: VXORPSZrm:  [ 0.00  0.00 ]
+Key: VXORPSZrmb:  [ 0.00  0.00 ]
+Key: VXORPSZrmbk:  [ 0.00  0.00 ]
+Key: VXORPSZrmbkz:  [ 0.00  0.00 ]
+Key: VXORPSZrmk:  [ 0.00  0.00 ]
+Key: VXORPSZrmkz:  [ 0.00  0.00 ]
+Key: VXORPSZrr:  [ 0.00  0.00 ]
+Key: VXORPSZrrk:  [ 0.00  0.00 ]
+Key: VXORPSZrrkz:  [ 0.00  0.00 ]
+Key: VXORPSrm:  [ 0.00  0.00 ]
+Key: VXORPSrr:  [ 0.00  0.00 ]
+Key: VZEROALL:  [ 0.00  0.00 ]
+Key: VZEROUPPER:  [ 0.00  0.00 ]
+Key: V_SET:  [ 0.00  0.00 ]
+Key: V_SETALLONES:  [ 0.00  0.00 ]
+Key: WAIT:  [ 0.00  0.00 ]
+Key: WBINVD:  [ 0.00  0.00 ]
+Key: WBNOINVD:  [ 0.00  0.00 ]
+Key: WRFLAGS:  [ 0.00  0.00 ]
+Key: WRFSBASE:  [ 0.00  0.00 ]
+Key: WRGSBASE:  [ 0.00  0.00 ]
+Key: WRMSR:  [ 0.00  0.00 ]
+Key: WRMSRLIST:  [ 0.00  0.00 ]
+Key: WRMSRNS:  [ 0.00  0.00 ]
+Key: WRMSRNSir:  [ 0.00  0.00 ]
+Key: WRMSRNSir_EVEX:  [ 0.00  0.00 ]
+Key: WRPKRUr:  [ 0.00  0.00 ]
+Key: WRSSD:  [ 0.00  0.00 ]
+Key: WRSSD_EVEX:  [ 0.00  0.00 ]
+Key: WRSSQ:  [ 0.00  0.00 ]
+Key: WRSSQ_EVEX:  [ 0.00  0.00 ]
+Key: WRUSSD:  [ 0.00  0.00 ]
+Key: WRUSSD_EVEX:  [ 0.00  0.00 ]
+Key: WRUSSQ:  [ 0.00  0.00 ]
+Key: WRUSSQ_EVEX:  [ 0.00  0.00 ]
+Key: XABORT:  [ 0.00  0.00 ]
+Key: XABORT_DEF:  [ 0.00  0.00 ]
+Key: XACQUIRE_PREFIX:  [ 0.00  0.00 ]
+Key: XADD:  [ 0.00  0.00 ]
+Key: XAM_F:  [ 0.00  0.00 ]
+Key: XAM_Fp:  [ 0.00  0.00 ]
+Key: XBEGIN:  [ 0.00  0.00 ]
+Key: XCHG:  [ 0.00  0.00 ]
+Key: XCH_F:  [ 0.00  0.00 ]
+Key: XCRYPTCBC:  [ 0.00  0.00 ]
+Key: XCRYPTCFB:  [ 0.00  0.00 ]
+Key: XCRYPTCTR:  [ 0.00  0.00 ]
+Key: XCRYPTECB:  [ 0.00  0.00 ]
+Key: XCRYPTOFB:  [ 0.00  0.00 ]
+Key: XEND:  [ 0.00  0.00 ]
+Key: XGETBV:  [ 0.00  0.00 ]
+Key: XLAT:  [ 0.00  0.00 ]
+Key: XOR:  [ 0.00  0.00 ]
+Key: XORPDrm:  [ 0.00  0.00 ]
+Key: XORPDrr:  [ 0.00  0.00 ]
+Key: XORPSrm:  [ 0.00  0.00 ]
+Key: XORPSrr:  [ 0.00  0.00 ]
+Key: XRELEASE_PREFIX:  [ 0.00  0.00 ]
+Key: XRESLDTRK:  [ 0.00  0.00 ]
+Key: XRSTOR:  [ 0.00  0.00 ]
+Key: XRSTORS:  [ 0.00  0.00 ]
+Key: XSAVE:  [ 0.00  0.00 ]
+Key: XSAVEC:  [ 0.00  0.00 ]
+Key: XSAVEOPT:  [ 0.00  0.00 ]
+Key: XSAVES:  [ 0.00  0.00 ]
+Key: XSETBV:  [ 0.00  0.00 ]
+Key: XSHA:  [ 0.00  0.00 ]
+Key: XSTORE:  [ 0.00  0.00 ]
+Key: XSUSLDTRK:  [ 0.00  0.00 ]
+Key: XTEST:  [ 0.00  0.00 ]
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
new file mode 100644
index 0000000..4409e6d
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
@@ -0,0 +1,6882 @@
+Key: AAA:  [ 0.00  0.00 ]
+Key: AAD:  [ 0.00  0.00 ]
+Key: AADD:  [ 0.00  0.00 ]
+Key: AAM:  [ 0.00  0.00 ]
+Key: AAND:  [ 0.00  0.00 ]
+Key: AAS:  [ 0.00  0.00 ]
+Key: ABS_F:  [ 0.00  0.00 ]
+Key: ABS_Fp:  [ 0.50  1.00 ]
+Key: ADC:  [ 1.50  2.00 ]
+Key: ADCX:  [ 0.00  0.00 ]
+Key: ADD:  [ 2.50  3.00 ]
+Key: ADDPDrm:  [ 3.50  4.00 ]
+Key: ADDPDrr:  [ 4.50  5.00 ]
+Key: ADDPSrm:  [ 0.00  0.00 ]
+Key: ADDPSrr:  [ 5.50  6.00 ]
+Key: ADDR:  [ 0.00  0.00 ]
+Key: ADDSDrm:  [ 6.50  7.00 ]
+Key: ADDSDrm_Int:  [ 0.00  0.00 ]
+Key: ADDSDrr:  [ 0.00  0.00 ]
+Key: ADDSDrr_Int:  [ 0.00  0.00 ]
+Key: ADDSSrm:  [ 0.00  0.00 ]
+Key: ADDSSrm_Int:  [ 0.00  0.00 ]
+Key: ADDSSrr:  [ 0.00  0.00 ]
+Key: ADDSSrr_Int:  [ 0.00  0.00 ]
+Key: ADDSUBPDrm:  [ 0.00  0.00 ]
+Key: ADDSUBPDrr:  [ 0.00  0.00 ]
+Key: ADDSUBPSrm:  [ 0.00  0.00 ]
+Key: ADDSUBPSrr:  [ 0.00  0.00 ]
+Key: ADD_F:  [ 0.00  0.00 ]
+Key: ADD_FI:  [ 0.00  0.00 ]
+Key: ADD_FPrST:  [ 0.00  0.00 ]
+Key: ADD_FST:  [ 0.00  0.00 ]
+Key: ADD_Fp:  [ 0.00  0.00 ]
+Key: ADD_FpI:  [ 0.00  0.00 ]
+Key: ADD_FrST:  [ 0.00  0.00 ]
+Key: ADJCALLSTACKDOWN:  [ 0.00  0.00 ]
+Key: ADJCALLSTACKUP:  [ 0.00  0.00 ]
+Key: ADOX:  [ 0.00  0.00 ]
+Key: AESDEC:  [ 0.00  0.00 ]
+Key: AESDECLASTrm:  [ 0.00  0.00 ]
+Key: AESDECLASTrr:  [ 0.00  0.00 ]
+Key: AESDECWIDE:  [ 0.00  0.00 ]
+Key: AESDECrm:  [ 0.00  0.00 ]
+Key: AESDECrr:  [ 0.00  0.00 ]
+Key: AESENC:  [ 0.00  0.00 ]
+Key: AESENCLASTrm:  [ 0.00  0.00 ]
+Key: AESENCLASTrr:  [ 0.00  0.00 ]
+Key: AESENCWIDE:  [ 0.00  0.00 ]
+Key: AESENCrm:  [ 0.00  0.00 ]
+Key: AESENCrr:  [ 0.00  0.00 ]
+Key: AESIMCrm:  [ 0.00  0.00 ]
+Key: AESIMCrr:  [ 0.00  0.00 ]
+Key: AESKEYGENASSISTrmi:  [ 0.00  0.00 ]
+Key: AESKEYGENASSISTrri:  [ 0.00  0.00 ]
+Key: AND:  [ 0.00  0.00 ]
+Key: ANDN:  [ 0.00  0.00 ]
+Key: ANDNPDrm:  [ 0.00  0.00 ]
+Key: ANDNPDrr:  [ 0.00  0.00 ]
+Key: ANDNPSrm:  [ 0.00  0.00 ]
+Key: ANDNPSrr:  [ 0.00  0.00 ]
+Key: ANDPDrm:  [ 0.00  0.00 ]
+Key: ANDPDrr:  [ 0.00  0.00 ]
+Key: ANDPSrm:  [ 0.00  0.00 ]
+Key: ANDPSrr:  [ 0.00  0.00 ]
+Key: ANNOTATION_LABEL:  [ 0.00  0.00 ]
+Key: AOR:  [ 0.00  0.00 ]
+Key: ARITH_FENCE:  [ 0.00  0.00 ]
+Key: ARPL:  [ 0.00  0.00 ]
+Key: ASAN_CHECK_MEMACCESS:  [ 0.00  0.00 ]
+Key: AVX:  [ 0.00  0.00 ]
+Key: AVX_SET:  [ 0.00  0.00 ]
+Key: AXOR:  [ 0.00  0.00 ]
+Key: BEXTR:  [ 0.00  0.00 ]
+Key: BEXTRI:  [ 0.00  0.00 ]
+Key: BLCFILL:  [ 0.00  0.00 ]
+Key: BLCI:  [ 0.00  0.00 ]
+Key: BLCIC:  [ 0.00  0.00 ]
+Key: BLCMSK:  [ 0.00  0.00 ]
+Key: BLCS:  [ 0.00  0.00 ]
+Key: BLENDPDrmi:  [ 0.00  0.00 ]
+Key: BLENDPDrri:  [ 0.00  0.00 ]
+Key: BLENDPSrmi:  [ 0.00  0.00 ]
+Key: BLENDPSrri:  [ 0.00  0.00 ]
+Key: BLENDVPDrm:  [ 0.00  0.00 ]
+Key: BLENDVPDrr:  [ 0.00  0.00 ]
+Key: BLENDVPSrm:  [ 0.00  0.00 ]
+Key: BLENDVPSrr:  [ 0.00  0.00 ]
+Key: BLSFILL:  [ 0.00  0.00 ]
+Key: BLSI:  [ 0.00  0.00 ]
+Key: BLSIC:  [ 0.00  0.00 ]
+Key: BLSMSK:  [ 0.00  0.00 ]
+Key: BLSR:  [ 0.00  0.00 ]
+Key: BOUNDS:  [ 0.00  0.00 ]
+Key: BSF:  [ 0.00  0.00 ]
+Key: BSR:  [ 0.00  0.00 ]
+Key: BSWAP:  [ 0.00  0.00 ]
+Key: BT:  [ 0.00  0.00 ]
+Key: BTC:  [ 0.00  0.00 ]
+Key: BTR:  [ 0.00  0.00 ]
+Key: BTS:  [ 0.00  0.00 ]
+Key: BUNDLE:  [ 0.00  0.00 ]
+Key: BZHI:  [ 0.00  0.00 ]
+Key: CALL:  [ 0.00  0.00 ]
+Key: CALLpcrel:  [ 0.00  0.00 ]
+Key: CATCHRET:  [ 0.00  0.00 ]
+Key: CBW:  [ 0.00  0.00 ]
+Key: CCMP:  [ 0.00  0.00 ]
+Key: CDQ:  [ 0.00  0.00 ]
+Key: CDQE:  [ 0.00  0.00 ]
+Key: CFCMOV:  [ 0.00  0.00 ]
+Key: CFI_INSTRUCTION:  [ 0.00  0.00 ]
+Key: CHS_F:  [ 0.00  0.00 ]
+Key: CHS_Fp:  [ 0.00  0.00 ]
+Key: CLAC:  [ 0.00  0.00 ]
+Key: CLC:  [ 0.00  0.00 ]
+Key: CLD:  [ 0.00  0.00 ]
+Key: CLDEMOTE:  [ 0.00  0.00 ]
+Key: CLEANUPRET:  [ 0.00  0.00 ]
+Key: CLFLUSH:  [ 0.00  0.00 ]
+Key: CLFLUSHOPT:  [ 0.00  0.00 ]
+Key: CLGI:  [ 0.00  0.00 ]
+Key: CLI:  [ 0.00  0.00 ]
+Key: CLRSSBSY:  [ 0.00  0.00 ]
+Key: CLTS:  [ 0.00  0.00 ]
+Key: CLUI:  [ 0.00  0.00 ]
+Key: CLWB:  [ 0.00  0.00 ]
+Key: CLZERO:  [ 0.00  0.00 ]
+Key: CMC:  [ 0.00  0.00 ]
+Key: CMOV:  [ 0.00  0.00 ]
+Key: CMOVBE_F:  [ 0.00  0.00 ]
+Key: CMOVBE_Fp:  [ 0.00  0.00 ]
+Key: CMOVB_F:  [ 0.00  0.00 ]
+Key: CMOVB_Fp:  [ 0.00  0.00 ]
+Key: CMOVE_F:  [ 0.00  0.00 ]
+Key: CMOVE_Fp:  [ 0.00  0.00 ]
+Key: CMOVNBE_F:  [ 0.00  0.00 ]
+Key: CMOVNBE_Fp:  [ 0.00  0.00 ]
+Key: CMOVNB_F:  [ 0.00  0.00 ]
+Key: CMOVNB_Fp:  [ 0.00  0.00 ]
+Key: CMOVNE_F:  [ 0.00  0.00 ]
+Key: CMOVNE_Fp:  [ 0.00  0.00 ]
+Key: CMOVNP_F:  [ 0.00  0.00 ]
+Key: CMOVNP_Fp:  [ 0.00  0.00 ]
+Key: CMOVP_F:  [ 0.00  0.00 ]
+Key: CMOVP_Fp:  [ 0.00  0.00 ]
+Key: CMOV_FR:  [ 0.00  0.00 ]
+Key: CMOV_GR:  [ 0.00  0.00 ]
+Key: CMOV_RFP:  [ 0.00  0.00 ]
+Key: CMOV_VK:  [ 0.00  0.00 ]
+Key: CMOV_VR:  [ 0.00  0.00 ]
+Key: CMP:  [ 0.00  0.00 ]
+Key: CMPCCXADDmr:  [ 0.00  0.00 ]
+Key: CMPPDrmi:  [ 0.00  0.00 ]
+Key: CMPPDrri:  [ 0.00  0.00 ]
+Key: CMPPSrmi:  [ 0.00  0.00 ]
+Key: CMPPSrri:  [ 0.00  0.00 ]
+Key: CMPSB:  [ 0.00  0.00 ]
+Key: CMPSDrmi:  [ 0.00  0.00 ]
+Key: CMPSDrmi_Int:  [ 0.00  0.00 ]
+Key: CMPSDrri:  [ 0.00  0.00 ]
+Key: CMPSDrri_Int:  [ 0.00  0.00 ]
+Key: CMPSL:  [ 0.00  0.00 ]
+Key: CMPSQ:  [ 0.00  0.00 ]
+Key: CMPSSrmi:  [ 0.00  0.00 ]
+Key: CMPSSrmi_Int:  [ 0.00  0.00 ]
+Key: CMPSSrri:  [ 0.00  0.00 ]
+Key: CMPSSrri_Int:  [ 0.00  0.00 ]
+Key: CMPSW:  [ 0.00  0.00 ]
+Key: CMPXCHG:  [ 0.00  0.00 ]
+Key: COMISDrm:  [ 0.00  0.00 ]
+Key: COMISDrm_Int:  [ 0.00  0.00 ]
+Key: COMISDrr:  [ 0.00  0.00 ]
+Key: COMISDrr_Int:  [ 0.00  0.00 ]
+Key: COMISSrm:  [ 0.00  0.00 ]
+Key: COMISSrm_Int:  [ 0.00  0.00 ]
+Key: COMISSrr:  [ 0.00  0.00 ]
+Key: COMISSrr_Int:  [ 0.00  0.00 ]
+Key: COMP_FST:  [ 0.00  0.00 ]
+Key: COM_FIPr:  [ 0.00  0.00 ]
+Key: COM_FIr:  [ 0.00  0.00 ]
+Key: COM_FST:  [ 0.00  0.00 ]
+Key: COM_FpIr:  [ 0.00  0.00 ]
+Key: COM_Fpr:  [ 0.00  0.00 ]
+Key: CONVERGENCECTRL_ANCHOR:  [ 0.00  0.00 ]
+Key: CONVERGENCECTRL_ENTRY:  [ 0.00  0.00 ]
+Key: CONVERGENCECTRL_GLUE:  [ 0.00  0.00 ]
+Key: CONVERGENCECTRL_LOOP:  [ 0.00  0.00 ]
+Key: COPY:  [ 0.00  0.00 ]
+Key: COPY_TO_REGCLASS:  [ 0.00  0.00 ]
+Key: CPUID:  [ 0.00  0.00 ]
+Key: CQO:  [ 0.00  0.00 ]
+Key: CRC:  [ 0.00  0.00 ]
+Key: CS_PREFIX:  [ 0.00  0.00 ]
+Key: CTEST:  [ 0.00  0.00 ]
+Key: CVTDQ:  [ 0.00  0.00 ]
+Key: CVTPD:  [ 0.00  0.00 ]
+Key: CVTPS:  [ 0.00  0.00 ]
+Key: CVTSD:  [ 0.00  0.00 ]
+Key: CVTSI:  [ 0.00  0.00 ]
+Key: CVTSS:  [ 0.00  0.00 ]
+Key: CVTTPD:  [ 0.00  0.00 ]
+Key: CVTTPS:  [ 0.00  0.00 ]
+Key: CVTTSD:  [ 0.00  0.00 ]
+Key: CVTTSS:  [ 0.00  0.00 ]
+Key: CWD:  [ 0.00  0.00 ]
+Key: CWDE:  [ 0.00  0.00 ]
+Key: DAA:  [ 0.00  0.00 ]
+Key: DAS:  [ 0.00  0.00 ]
+Key: DATA:  [ 0.00  0.00 ]
+Key: DBG_INSTR_REF:  [ 0.00  0.00 ]
+Key: DBG_LABEL:  [ 0.00  0.00 ]
+Key: DBG_PHI:  [ 0.00  0.00 ]
+Key: DBG_VALUE:  [ 0.00  0.00 ]
+Key: DBG_VALUE_LIST:  [ 0.00  0.00 ]
+Key: DEC:  [ 0.00  0.00 ]
+Key: DIV:  [ 0.00  0.00 ]
+Key: DIVPDrm:  [ 0.00  0.00 ]
+Key: DIVPDrr:  [ 0.00  0.00 ]
+Key: DIVPSrm:  [ 0.00  0.00 ]
+Key: DIVPSrr:  [ 0.00  0.00 ]
+Key: DIVR_F:  [ 0.00  0.00 ]
+Key: DIVR_FI:  [ 0.00  0.00 ]
+Key: DIVR_FPrST:  [ 0.00  0.00 ]
+Key: DIVR_FST:  [ 0.00  0.00 ]
+Key: DIVR_Fp:  [ 0.00  0.00 ]
+Key: DIVR_FpI:  [ 0.00  0.00 ]
+Key: DIVR_FrST:  [ 0.00  0.00 ]
+Key: DIVSDrm:  [ 0.00  0.00 ]
+Key: DIVSDrm_Int:  [ 0.00  0.00 ]
+Key: DIVSDrr:  [ 0.00  0.00 ]
+Key: DIVSDrr_Int:  [ 0.00  0.00 ]
+Key: DIVSSrm:  [ 0.00  0.00 ]
+Key: DIVSSrm_Int:  [ 0.00  0.00 ]
+Key: DIVSSrr:  [ 0.00  0.00 ]
+Key: DIVSSrr_Int:  [ 0.00  0.00 ]
+Key: DIV_F:  [ 0.00  0.00 ]
+Key: DIV_FI:  [ 0.00  0.00 ]
+Key: DIV_FPrST:  [ 0.00  0.00 ]
+Key: DIV_FST:  [ 0.00  0.00 ]
+Key: DIV_Fp:  [ 0.00  0.00 ]
+Key: DIV_FpI:  [ 0.00  0.00 ]
+Key: DIV_FrST:  [ 0.00  0.00 ]
+Key: DPPDrmi:  [ 0.00  0.00 ]
+Key: DPPDrri:  [ 0.00  0.00 ]
+Key: DPPSrmi:  [ 0.00  0.00 ]
+Key: DPPSrri:  [ 0.00  0.00 ]
+Key: DS_PREFIX:  [ 0.00  0.00 ]
+Key: DYN_ALLOCA:  [ 0.00  0.00 ]
+Key: EH_LABEL:  [ 0.00  0.00 ]
+Key: EH_RETURN:  [ 0.00  0.00 ]
+Key: EH_SjLj_LongJmp:  [ 0.00  0.00 ]
+Key: EH_SjLj_SetJmp:  [ 0.00  0.00 ]
+Key: EH_SjLj_Setup:  [ 0.00  0.00 ]
+Key: ENCLS:  [ 0.00  0.00 ]
+Key: ENCLU:  [ 0.00  0.00 ]
+Key: ENCLV:  [ 0.00  0.00 ]
+Key: ENCODEKEY:  [ 0.00  0.00 ]
+Key: ENDBR:  [ 0.00  0.00 ]
+Key: ENQCMD:  [ 0.00  0.00 ]
+Key: ENQCMDS:  [ 0.00  0.00 ]
+Key: ENTER:  [ 0.00  0.00 ]
+Key: ERETS:  [ 0.00  0.00 ]
+Key: ERETU:  [ 0.00  0.00 ]
+Key: ES_PREFIX:  [ 0.00  0.00 ]
+Key: EXTRACTPSmri:  [ 0.00  0.00 ]
+Key: EXTRACTPSrri:  [ 0.00  0.00 ]
+Key: EXTRACT_SUBREG:  [ 0.00  0.00 ]
+Key: EXTRQ:  [ 0.00  0.00 ]
+Key: EXTRQI:  [ 0.00  0.00 ]
+Key: F:  [ 0.00  0.00 ]
+Key: FAKE_USE:  [ 0.00  0.00 ]
+Key: FARCALL:  [ 0.00  0.00 ]
+Key: FARJMP:  [ 0.00  0.00 ]
+Key: FAULTING_OP:  [ 0.00  0.00 ]
+Key: FBLDm:  [ 0.00  0.00 ]
+Key: FBSTPm:  [ 0.00  0.00 ]
+Key: FCOM:  [ 0.00  0.00 ]
+Key: FCOMP:  [ 0.00  0.00 ]
+Key: FCOMPP:  [ 0.00  0.00 ]
+Key: FCOS:  [ 0.00  0.00 ]
+Key: FDECSTP:  [ 0.00  0.00 ]
+Key: FEMMS:  [ 0.00  0.00 ]
+Key: FENTRY_CALL:  [ 0.00  0.00 ]
+Key: FFREE:  [ 0.00  0.00 ]
+Key: FFREEP:  [ 0.00  0.00 ]
+Key: FICOM:  [ 0.00  0.00 ]
+Key: FICOMP:  [ 0.00  0.00 ]
+Key: FINCSTP:  [ 0.00  0.00 ]
+Key: FLDCW:  [ 0.00  0.00 ]
+Key: FLDENVm:  [ 0.00  0.00 ]
+Key: FLDL:  [ 0.00  0.00 ]
+Key: FLDLG:  [ 0.00  0.00 ]
+Key: FLDLN:  [ 0.00  0.00 ]
+Key: FLDPI:  [ 0.00  0.00 ]
+Key: FNCLEX:  [ 0.00  0.00 ]
+Key: FNINIT:  [ 0.00  0.00 ]
+Key: FNOP:  [ 0.00  0.00 ]
+Key: FNSTCW:  [ 0.00  0.00 ]
+Key: FNSTSW:  [ 0.00  0.00 ]
+Key: FNSTSWm:  [ 0.00  0.00 ]
+Key: FP:  [ 0.00  0.00 ]
+Key: FPATAN:  [ 0.00  0.00 ]
+Key: FPREM:  [ 0.00  0.00 ]
+Key: FPTAN:  [ 0.00  0.00 ]
+Key: FRNDINT:  [ 0.00  0.00 ]
+Key: FRSTORm:  [ 0.00  0.00 ]
+Key: FSAVEm:  [ 0.00  0.00 ]
+Key: FSCALE:  [ 0.00  0.00 ]
+Key: FSIN:  [ 0.00  0.00 ]
+Key: FSINCOS:  [ 0.00  0.00 ]
+Key: FSTENVm:  [ 0.00  0.00 ]
+Key: FS_PREFIX:  [ 0.00  0.00 ]
+Key: FXRSTOR:  [ 0.00  0.00 ]
+Key: FXSAVE:  [ 0.00  0.00 ]
+Key: FXTRACT:  [ 0.00  0.00 ]
+Key: FYL:  [ 0.00  0.00 ]
+Key: FsFLD:  [ 0.00  0.00 ]
+Key: GC_LABEL:  [ 0.00  0.00 ]
+Key: GETSEC:  [ 0.00  0.00 ]
+Key: GF:  [ 0.00  0.00 ]
+Key: GS_PREFIX:  [ 0.00  0.00 ]
+Key: G_ABDS:  [ 0.00  0.00 ]
+Key: G_ABDU:  [ 0.00  0.00 ]
+Key: G_ABS:  [ 0.00  0.00 ]
+Key: G_ADD:  [ 0.00  0.00 ]
+Key: G_ADDRSPACE_CAST:  [ 0.00  0.00 ]
+Key: G_AND:  [ 0.00  0.00 ]
+Key: G_ANYEXT:  [ 0.00  0.00 ]
+Key: G_ASHR:  [ 0.00  0.00 ]
+Key: G_ASSERT_ALIGN:  [ 0.00  0.00 ]
+Key: G_ASSERT_SEXT:  [ 0.00  0.00 ]
+Key: G_ASSERT_ZEXT:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_ADD:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_AND:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FADD:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FMAX:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FMAXIMUM:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FMIN:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FMINIMUM:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_FSUB:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_MAX:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_MIN:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_NAND:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_OR:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_SUB:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_UDEC_WRAP:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_UINC_WRAP:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_UMAX:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_UMIN:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_USUB_COND:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_USUB_SAT:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_XCHG:  [ 0.00  0.00 ]
+Key: G_ATOMICRMW_XOR:  [ 0.00  0.00 ]
+Key: G_ATOMIC_CMPXCHG:  [ 0.00  0.00 ]
+Key: G_ATOMIC_CMPXCHG_WITH_SUCCESS:  [ 0.00  0.00 ]
+Key: G_BITCAST:  [ 0.00  0.00 ]
+Key: G_BITREVERSE:  [ 0.00  0.00 ]
+Key: G_BLOCK_ADDR:  [ 0.00  0.00 ]
+Key: G_BR:  [ 0.00  0.00 ]
+Key: G_BRCOND:  [ 0.00  0.00 ]
+Key: G_BRINDIRECT:  [ 0.00  0.00 ]
+Key: G_BRJT:  [ 0.00  0.00 ]
+Key: G_BSWAP:  [ 0.00  0.00 ]
+Key: G_BUILD_VECTOR:  [ 0.00  0.00 ]
+Key: G_BUILD_VECTOR_TRUNC:  [ 0.00  0.00 ]
+Key: G_BZERO:  [ 0.00  0.00 ]
+Key: G_CONCAT_VECTORS:  [ 0.00  0.00 ]
+Key: G_CONSTANT:  [ 0.00  0.00 ]
+Key: G_CONSTANT_FOLD_BARRIER:  [ 0.00  0.00 ]
+Key: G_CONSTANT_POOL:  [ 0.00  0.00 ]
+Key: G_CTLZ:  [ 0.00  0.00 ]
+Key: G_CTLZ_ZERO_UNDEF:  [ 0.00  0.00 ]
+Key: G_CTPOP:  [ 0.00  0.00 ]
+Key: G_CTTZ:  [ 0.00  0.00 ]
+Key: G_CTTZ_ZERO_UNDEF:  [ 0.00  0.00 ]
+Key: G_DEBUGTRAP:  [ 0.00  0.00 ]
+Key: G_DYN_STACKALLOC:  [ 0.00  0.00 ]
+Key: G_EXTRACT:  [ 0.00  0.00 ]
+Key: G_EXTRACT_SUBVECTOR:  [ 0.00  0.00 ]
+Key: G_EXTRACT_VECTOR_ELT:  [ 0.00  0.00 ]
+Key: G_FABS:  [ 0.00  0.00 ]
+Key: G_FACOS:  [ 0.00  0.00 ]
+Key: G_FADD:  [ 0.00  0.00 ]
+Key: G_FASIN:  [ 0.00  0.00 ]
+Key: G_FATAN:  [ 0.00  0.00 ]
+Key: G_FCANONICALIZE:  [ 0.00  0.00 ]
+Key: G_FCEIL:  [ 0.00  0.00 ]
+Key: G_FCMP:  [ 0.00  0.00 ]
+Key: G_FCONSTANT:  [ 0.00  0.00 ]
+Key: G_FCOPYSIGN:  [ 0.00  0.00 ]
+Key: G_FCOS:  [ 0.00  0.00 ]
+Key: G_FCOSH:  [ 0.00  0.00 ]
+Key: G_FDIV:  [ 0.00  0.00 ]
+Key: G_FENCE:  [ 0.00  0.00 ]
+Key: G_FEXP:  [ 0.00  0.00 ]
+Key: G_FFLOOR:  [ 0.00  0.00 ]
+Key: G_FFREXP:  [ 0.00  0.00 ]
+Key: G_FILD:  [ 0.00  0.00 ]
+Key: G_FIST:  [ 0.00  0.00 ]
+Key: G_FLDCW:  [ 0.00  0.00 ]
+Key: G_FLDEXP:  [ 0.00  0.00 ]
+Key: G_FLOG:  [ 0.00  0.00 ]
+Key: G_FMA:  [ 0.00  0.00 ]
+Key: G_FMAD:  [ 0.00  0.00 ]
+Key: G_FMAXIMUM:  [ 0.00  0.00 ]
+Key: G_FMAXIMUMNUM:  [ 0.00  0.00 ]
+Key: G_FMAXNUM:  [ 0.00  0.00 ]
+Key: G_FMAXNUM_IEEE:  [ 0.00  0.00 ]
+Key: G_FMINIMUM:  [ 0.00  0.00 ]
+Key: G_FMINIMUMNUM:  [ 0.00  0.00 ]
+Key: G_FMINNUM:  [ 0.00  0.00 ]
+Key: G_FMINNUM_IEEE:  [ 0.00  0.00 ]
+Key: G_FMODF:  [ 0.00  0.00 ]
+Key: G_FMUL:  [ 0.00  0.00 ]
+Key: G_FNEARBYINT:  [ 0.00  0.00 ]
+Key: G_FNEG:  [ 0.00  0.00 ]
+Key: G_FNSTCW:  [ 0.00  0.00 ]
+Key: G_FPEXT:  [ 0.00  0.00 ]
+Key: G_FPOW:  [ 0.00  0.00 ]
+Key: G_FPOWI:  [ 0.00  0.00 ]
+Key: G_FPTOSI:  [ 0.00  0.00 ]
+Key: G_FPTOSI_SAT:  [ 0.00  0.00 ]
+Key: G_FPTOUI:  [ 0.00  0.00 ]
+Key: G_FPTOUI_SAT:  [ 0.00  0.00 ]
+Key: G_FPTRUNC:  [ 0.00  0.00 ]
+Key: G_FRAME_INDEX:  [ 0.00  0.00 ]
+Key: G_FREEZE:  [ 0.00  0.00 ]
+Key: G_FREM:  [ 0.00  0.00 ]
+Key: G_FRINT:  [ 0.00  0.00 ]
+Key: G_FSHL:  [ 0.00  0.00 ]
+Key: G_FSHR:  [ 0.00  0.00 ]
+Key: G_FSIN:  [ 0.00  0.00 ]
+Key: G_FSINCOS:  [ 0.00  0.00 ]
+Key: G_FSINH:  [ 0.00  0.00 ]
+Key: G_FSQRT:  [ 0.00  0.00 ]
+Key: G_FSUB:  [ 0.00  0.00 ]
+Key: G_FTAN:  [ 0.00  0.00 ]
+Key: G_FTANH:  [ 0.00  0.00 ]
+Key: G_GET_FPENV:  [ 0.00  0.00 ]
+Key: G_GET_FPMODE:  [ 0.00  0.00 ]
+Key: G_GET_ROUNDING:  [ 0.00  0.00 ]
+Key: G_GLOBAL_VALUE:  [ 0.00  0.00 ]
+Key: G_ICMP:  [ 0.00  0.00 ]
+Key: G_IMPLICIT_DEF:  [ 0.00  0.00 ]
+Key: G_INDEXED_LOAD:  [ 0.00  0.00 ]
+Key: G_INDEXED_SEXTLOAD:  [ 0.00  0.00 ]
+Key: G_INDEXED_STORE:  [ 0.00  0.00 ]
+Key: G_INDEXED_ZEXTLOAD:  [ 0.00  0.00 ]
+Key: G_INSERT:  [ 0.00  0.00 ]
+Key: G_INSERT_SUBVECTOR:  [ 0.00  0.00 ]
+Key: G_INSERT_VECTOR_ELT:  [ 0.00  0.00 ]
+Key: G_INTRINSIC:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_CONVERGENT:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_FPTRUNC_ROUND:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_LLRINT:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_LRINT:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_ROUND:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_ROUNDEVEN:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_TRUNC:  [ 0.00  0.00 ]
+Key: G_INTRINSIC_W_SIDE_EFFECTS:  [ 0.00  0.00 ]
+Key: G_INTTOPTR:  [ 0.00  0.00 ]
+Key: G_INVOKE_REGION_START:  [ 0.00  0.00 ]
+Key: G_IS_FPCLASS:  [ 0.00  0.00 ]
+Key: G_JUMP_TABLE:  [ 0.00  0.00 ]
+Key: G_LLROUND:  [ 0.00  0.00 ]
+Key: G_LOAD:  [ 0.00  0.00 ]
+Key: G_LROUND:  [ 0.00  0.00 ]
+Key: G_LSHR:  [ 0.00  0.00 ]
+Key: G_MEMCPY:  [ 0.00  0.00 ]
+Key: G_MEMCPY_INLINE:  [ 0.00  0.00 ]
+Key: G_MEMMOVE:  [ 0.00  0.00 ]
+Key: G_MEMSET:  [ 0.00  0.00 ]
+Key: G_MERGE_VALUES:  [ 0.00  0.00 ]
+Key: G_MUL:  [ 0.00  0.00 ]
+Key: G_OR:  [ 0.00  0.00 ]
+Key: G_PHI:  [ 0.00  0.00 ]
+Key: G_PREFETCH:  [ 0.00  0.00 ]
+Key: G_PTRAUTH_GLOBAL_VALUE:  [ 0.00  0.00 ]
+Key: G_PTRMASK:  [ 0.00  0.00 ]
+Key: G_PTRTOINT:  [ 0.00  0.00 ]
+Key: G_PTR_ADD:  [ 0.00  0.00 ]
+Key: G_READCYCLECOUNTER:  [ 0.00  0.00 ]
+Key: G_READSTEADYCOUNTER:  [ 0.00  0.00 ]
+Key: G_READ_REGISTER:  [ 0.00  0.00 ]
+Key: G_RESET_FPENV:  [ 0.00  0.00 ]
+Key: G_RESET_FPMODE:  [ 0.00  0.00 ]
+Key: G_ROTL:  [ 0.00  0.00 ]
+Key: G_ROTR:  [ 0.00  0.00 ]
+Key: G_SADDE:  [ 0.00  0.00 ]
+Key: G_SADDO:  [ 0.00  0.00 ]
+Key: G_SADDSAT:  [ 0.00  0.00 ]
+Key: G_SBFX:  [ 0.00  0.00 ]
+Key: G_SCMP:  [ 0.00  0.00 ]
+Key: G_SDIV:  [ 0.00  0.00 ]
+Key: G_SDIVFIX:  [ 0.00  0.00 ]
+Key: G_SDIVFIXSAT:  [ 0.00  0.00 ]
+Key: G_SDIVREM:  [ 0.00  0.00 ]
+Key: G_SELECT:  [ 0.00  0.00 ]
+Key: G_SET_FPENV:  [ 0.00  0.00 ]
+Key: G_SET_FPMODE:  [ 0.00  0.00 ]
+Key: G_SET_ROUNDING:  [ 0.00  0.00 ]
+Key: G_SEXT:  [ 0.00  0.00 ]
+Key: G_SEXTLOAD:  [ 0.00  0.00 ]
+Key: G_SEXT_INREG:  [ 0.00  0.00 ]
+Key: G_SHL:  [ 0.00  0.00 ]
+Key: G_SHUFFLE_VECTOR:  [ 0.00  0.00 ]
+Key: G_SITOFP:  [ 0.00  0.00 ]
+Key: G_SMAX:  [ 0.00  0.00 ]
+Key: G_SMIN:  [ 0.00  0.00 ]
+Key: G_SMULFIX:  [ 0.00  0.00 ]
+Key: G_SMULFIXSAT:  [ 0.00  0.00 ]
+Key: G_SMULH:  [ 0.00  0.00 ]
+Key: G_SMULO:  [ 0.00  0.00 ]
+Key: G_SPLAT_VECTOR:  [ 0.00  0.00 ]
+Key: G_SREM:  [ 0.00  0.00 ]
+Key: G_SSHLSAT:  [ 0.00  0.00 ]
+Key: G_SSUBE:  [ 0.00  0.00 ]
+Key: G_SSUBO:  [ 0.00  0.00 ]
+Key: G_SSUBSAT:  [ 0.00  0.00 ]
+Key: G_STACKRESTORE:  [ 0.00  0.00 ]
+Key: G_STACKSAVE:  [ 0.00  0.00 ]
+Key: G_STEP_VECTOR:  [ 0.00  0.00 ]
+Key: G_STORE:  [ 0.00  0.00 ]
+Key: G_STRICT_FADD:  [ 0.00  0.00 ]
+Key: G_STRICT_FDIV:  [ 0.00  0.00 ]
+Key: G_STRICT_FLDEXP:  [ 0.00  0.00 ]
+Key: G_STRICT_FMA:  [ 0.00  0.00 ]
+Key: G_STRICT_FMUL:  [ 0.00  0.00 ]
+Key: G_STRICT_FREM:  [ 0.00  0.00 ]
+Key: G_STRICT_FSQRT:  [ 0.00  0.00 ]
+Key: G_STRICT_FSUB:  [ 0.00  0.00 ]
+Key: G_SUB:  [ 0.00  0.00 ]
+Key: G_TRAP:  [ 0.00  0.00 ]
+Key: G_TRUNC:  [ 0.00  0.00 ]
+Key: G_TRUNC_SSAT_S:  [ 0.00  0.00 ]
+Key: G_TRUNC_SSAT_U:  [ 0.00  0.00 ]
+Key: G_TRUNC_USAT_U:  [ 0.00  0.00 ]
+Key: G_UADDE:  [ 0.00  0.00 ]
+Key: G_UADDO:  [ 0.00  0.00 ]
+Key: G_UADDSAT:  [ 0.00  0.00 ]
+Key: G_UBFX:  [ 0.00  0.00 ]
+Key: G_UBSANTRAP:  [ 0.00  0.00 ]
+Key: G_UCMP:  [ 0.00  0.00 ]
+Key: G_UDIV:  [ 0.00  0.00 ]
+Key: G_UDIVFIX:  [ 0.00  0.00 ]
+Key: G_UDIVFIXSAT:  [ 0.00  0.00 ]
+Key: G_UDIVREM:  [ 0.00  0.00 ]
+Key: G_UITOFP:  [ 0.00  0.00 ]
+Key: G_UMAX:  [ 0.00  0.00 ]
+Key: G_UMIN:  [ 0.00  0.00 ]
+Key: G_UMULFIX:  [ 0.00  0.00 ]
+Key: G_UMULFIXSAT:  [ 0.00  0.00 ]
+Key: G_UMULH:  [ 0.00  0.00 ]
+Key: G_UMULO:  [ 0.00  0.00 ]
+Key: G_UNMERGE_VALUES:  [ 0.00  0.00 ]
+Key: G_UREM:  [ 0.00  0.00 ]
+Key: G_USHLSAT:  [ 0.00  0.00 ]
+Key: G_USUBE:  [ 0.00  0.00 ]
+Key: G_USUBO:  [ 0.00  0.00 ]
+Key: G_USUBSAT:  [ 0.00  0.00 ]
+Key: G_VAARG:  [ 0.00  0.00 ]
+Key: G_VASTART:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_ADD:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_AND:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FADD:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMAX:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMAXIMUM:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMIN:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMINIMUM:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_FMUL:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_MUL:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_OR:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_SEQ_FADD:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_SEQ_FMUL:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_SMAX:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_SMIN:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_UMAX:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_UMIN:  [ 0.00  0.00 ]
+Key: G_VECREDUCE_XOR:  [ 0.00  0.00 ]
+Key: G_VECTOR_COMPRESS:  [ 0.00  0.00 ]
+Key: G_VSCALE:  [ 0.00  0.00 ]
+Key: G_WRITE_REGISTER:  [ 0.00  0.00 ]
+Key: G_XOR:  [ 0.00  0.00 ]
+Key: G_ZEXT:  [ 0.00  0.00 ]
+Key: G_ZEXTLOAD:  [ 0.00  0.00 ]
+Key: HADDPDrm:  [ 0.00  0.00 ]
+Key: HADDPDrr:  [ 0.00  0.00 ]
+Key: HADDPSrm:  [ 0.00  0.00 ]
+Key: HADDPSrr:  [ 0.00  0.00 ]
+Key: HLT:  [ 0.00  0.00 ]
+Key: HRESET:  [ 0.00  0.00 ]
+Key: HSUBPDrm:  [ 0.00  0.00 ]
+Key: HSUBPDrr:  [ 0.00  0.00 ]
+Key: HSUBPSrm:  [ 0.00  0.00 ]
+Key: HSUBPSrr:  [ 0.00  0.00 ]
+Key: ICALL_BRANCH_FUNNEL:  [ 0.00  0.00 ]
+Key: IDIV:  [ 0.00  0.00 ]
+Key: ILD_F:  [ 0.00  0.00 ]
+Key: ILD_Fp:  [ 0.00  0.00 ]
+Key: IMPLICIT_DEF:  [ 0.00  0.00 ]
+Key: IMUL:  [ 0.00  0.00 ]
+Key: IMULZU:  [ 0.00  0.00 ]
+Key: IN:  [ 0.00  0.00 ]
+Key: INC:  [ 0.00  0.00 ]
+Key: INCSSPD:  [ 0.00  0.00 ]
+Key: INCSSPQ:  [ 0.00  0.00 ]
+Key: INDIRECT_THUNK_CALL:  [ 0.00  0.00 ]
+Key: INDIRECT_THUNK_TCRETURN:  [ 0.00  0.00 ]
+Key: INIT_UNDEF:  [ 0.00  0.00 ]
+Key: INLINEASM:  [ 0.00  0.00 ]
+Key: INLINEASM_BR:  [ 0.00  0.00 ]
+Key: INSB:  [ 0.00  0.00 ]
+Key: INSERTPSrmi:  [ 0.00  0.00 ]
+Key: INSERTPSrri:  [ 0.00  0.00 ]
+Key: INSERTQ:  [ 0.00  0.00 ]
+Key: INSERTQI:  [ 0.00  0.00 ]
+Key: INSERT_SUBREG:  [ 0.00  0.00 ]
+Key: INSL:  [ 0.00  0.00 ]
+Key: INSW:  [ 0.00  0.00 ]
+Key: INT:  [ 0.00  0.00 ]
+Key: INTO:  [ 0.00  0.00 ]
+Key: INVD:  [ 0.00  0.00 ]
+Key: INVEPT:  [ 0.00  0.00 ]
+Key: INVLPG:  [ 0.00  0.00 ]
+Key: INVLPGA:  [ 0.00  0.00 ]
+Key: INVLPGB:  [ 0.00  0.00 ]
+Key: INVPCID:  [ 0.00  0.00 ]
+Key: INVVPID:  [ 0.00  0.00 ]
+Key: IRET:  [ 0.00  0.00 ]
+Key: ISTT_FP:  [ 0.00  0.00 ]
+Key: ISTT_Fp:  [ 0.00  0.00 ]
+Key: IST_F:  [ 0.00  0.00 ]
+Key: IST_FP:  [ 0.00  0.00 ]
+Key: IST_Fp:  [ 0.00  0.00 ]
+Key: Int_eh_sjlj_setup_dispatch:  [ 0.00  0.00 ]
+Key: JCC:  [ 0.00  0.00 ]
+Key: JCXZ:  [ 0.00  0.00 ]
+Key: JECXZ:  [ 0.00  0.00 ]
+Key: JMP:  [ 0.00  0.00 ]
+Key: JMPABS:  [ 0.00  0.00 ]
+Key: JRCXZ:  [ 0.00  0.00 ]
+Key: JUMP_TABLE_DEBUG_INFO:  [ 0.00  0.00 ]
+Key: KADDBkk:  [ 0.00  0.00 ]
+Key: KADDDkk:  [ 0.00  0.00 ]
+Key: KADDQkk:  [ 0.00  0.00 ]
+Key: KADDWkk:  [ 0.00  0.00 ]
+Key: KANDBkk:  [ 0.00  0.00 ]
+Key: KANDDkk:  [ 0.00  0.00 ]
+Key: KANDNBkk:  [ 0.00  0.00 ]
+Key: KANDNDkk:  [ 0.00  0.00 ]
+Key: KANDNQkk:  [ 0.00  0.00 ]
+Key: KANDNWkk:  [ 0.00  0.00 ]
+Key: KANDQkk:  [ 0.00  0.00 ]
+Key: KANDWkk:  [ 0.00  0.00 ]
+Key: KCFI_CHECK:  [ 0.00  0.00 ]
+Key: KILL:  [ 0.00  0.00 ]
+Key: KMOVBkk:  [ 0.00  0.00 ]
+Key: KMOVBkk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVBkm:  [ 0.00  0.00 ]
+Key: KMOVBkm_EVEX:  [ 0.00  0.00 ]
+Key: KMOVBkr:  [ 0.00  0.00 ]
+Key: KMOVBkr_EVEX:  [ 0.00  0.00 ]
+Key: KMOVBmk:  [ 0.00  0.00 ]
+Key: KMOVBmk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVBrk:  [ 0.00  0.00 ]
+Key: KMOVBrk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDkk:  [ 0.00  0.00 ]
+Key: KMOVDkk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDkm:  [ 0.00  0.00 ]
+Key: KMOVDkm_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDkr:  [ 0.00  0.00 ]
+Key: KMOVDkr_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDmk:  [ 0.00  0.00 ]
+Key: KMOVDmk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVDrk:  [ 0.00  0.00 ]
+Key: KMOVDrk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQkk:  [ 0.00  0.00 ]
+Key: KMOVQkk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQkm:  [ 0.00  0.00 ]
+Key: KMOVQkm_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQkr:  [ 0.00  0.00 ]
+Key: KMOVQkr_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQmk:  [ 0.00  0.00 ]
+Key: KMOVQmk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVQrk:  [ 0.00  0.00 ]
+Key: KMOVQrk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWkk:  [ 0.00  0.00 ]
+Key: KMOVWkk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWkm:  [ 0.00  0.00 ]
+Key: KMOVWkm_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWkr:  [ 0.00  0.00 ]
+Key: KMOVWkr_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWmk:  [ 0.00  0.00 ]
+Key: KMOVWmk_EVEX:  [ 0.00  0.00 ]
+Key: KMOVWrk:  [ 0.00  0.00 ]
+Key: KMOVWrk_EVEX:  [ 0.00  0.00 ]
+Key: KNOTBkk:  [ 0.00  0.00 ]
+Key: KNOTDkk:  [ 0.00  0.00 ]
+Key: KNOTQkk:  [ 0.00  0.00 ]
+Key: KNOTWkk:  [ 0.00  0.00 ]
+Key: KORBkk:  [ 0.00  0.00 ]
+Key: KORDkk:  [ 0.00  0.00 ]
+Key: KORQkk:  [ 0.00  0.00 ]
+Key: KORTESTBkk:  [ 0.00  0.00 ]
+Key: KORTESTDkk:  [ 0.00  0.00 ]
+Key: KORTESTQkk:  [ 0.00  0.00 ]
+Key: KORTESTWkk:  [ 0.00  0.00 ]
+Key: KORWkk:  [ 0.00  0.00 ]
+Key: KSET:  [ 0.00  0.00 ]
+Key: KSHIFTLBki:  [ 0.00  0.00 ]
+Key: KSHIFTLDki:  [ 0.00  0.00 ]
+Key: KSHIFTLQki:  [ 0.00  0.00 ]
+Key: KSHIFTLWki:  [ 0.00  0.00 ]
+Key: KSHIFTRBki:  [ 0.00  0.00 ]
+Key: KSHIFTRDki:  [ 0.00  0.00 ]
+Key: KSHIFTRQki:  [ 0.00  0.00 ]
+Key: KSHIFTRWki:  [ 0.00  0.00 ]
+Key: KTESTBkk:  [ 0.00  0.00 ]
+Key: KTESTDkk:  [ 0.00  0.00 ]
+Key: KTESTQkk:  [ 0.00  0.00 ]
+Key: KTESTWkk:  [ 0.00  0.00 ]
+Key: KUNPCKBWkk:  [ 0.00  0.00 ]
+Key: KUNPCKDQkk:  [ 0.00  0.00 ]
+Key: KUNPCKWDkk:  [ 0.00  0.00 ]
+Key: KXNORBkk:  [ 0.00  0.00 ]
+Key: KXNORDkk:  [ 0.00  0.00 ]
+Key: KXNORQkk:  [ 0.00  0.00 ]
+Key: KXNORWkk:  [ 0.00  0.00 ]
+Key: KXORBkk:  [ 0.00  0.00 ]
+Key: KXORDkk:  [ 0.00  0.00 ]
+Key: KXORQkk:  [ 0.00  0.00 ]
+Key: KXORWkk:  [ 0.00  0.00 ]
+Key: LAHF:  [ 0.00  0.00 ]
+Key: LAR:  [ 0.00  0.00 ]
+Key: LCMPXCHG:  [ 0.00  0.00 ]
+Key: LDDQUrm:  [ 0.00  0.00 ]
+Key: LDMXCSR:  [ 0.00  0.00 ]
+Key: LDS:  [ 0.00  0.00 ]
+Key: LDTILECFG:  [ 0.00  0.00 ]
+Key: LDTILECFG_EVEX:  [ 0.00  0.00 ]
+Key: LD_F:  [ 0.00  0.00 ]
+Key: LD_Fp:  [ 0.00  0.00 ]
+Key: LD_Frr:  [ 0.00  0.00 ]
+Key: LEA:  [ 0.00  0.00 ]
+Key: LEAVE:  [ 0.00  0.00 ]
+Key: LES:  [ 0.00  0.00 ]
+Key: LFENCE:  [ 0.00  0.00 ]
+Key: LFS:  [ 0.00  0.00 ]
+Key: LGDT:  [ 0.00  0.00 ]
+Key: LGS:  [ 0.00  0.00 ]
+Key: LIDT:  [ 0.00  0.00 ]
+Key: LIFETIME_END:  [ 0.00  0.00 ]
+Key: LIFETIME_START:  [ 0.00  0.00 ]
+Key: LKGS:  [ 0.00  0.00 ]
+Key: LLDT:  [ 0.00  0.00 ]
+Key: LLWPCB:  [ 0.00  0.00 ]
+Key: LMSW:  [ 0.00  0.00 ]
+Key: LOADIWKEY:  [ 0.00  0.00 ]
+Key: LOAD_STACK_GUARD:  [ 0.00  0.00 ]
+Key: LOCAL_ESCAPE:  [ 0.00  0.00 ]
+Key: LOCK_ADD:  [ 0.00  0.00 ]
+Key: LOCK_AND:  [ 0.00  0.00 ]
+Key: LOCK_BTC:  [ 0.00  0.00 ]
+Key: LOCK_BTC_RM:  [ 0.00  0.00 ]
+Key: LOCK_BTR:  [ 0.00  0.00 ]
+Key: LOCK_BTR_RM:  [ 0.00  0.00 ]
+Key: LOCK_BTS:  [ 0.00  0.00 ]
+Key: LOCK_BTS_RM:  [ 0.00  0.00 ]
+Key: LOCK_DEC:  [ 0.00  0.00 ]
+Key: LOCK_INC:  [ 0.00  0.00 ]
+Key: LOCK_OR:  [ 0.00  0.00 ]
+Key: LOCK_PREFIX:  [ 0.00  0.00 ]
+Key: LOCK_SUB:  [ 0.00  0.00 ]
+Key: LOCK_XOR:  [ 0.00  0.00 ]
+Key: LODSB:  [ 0.00  0.00 ]
+Key: LODSL:  [ 0.00  0.00 ]
+Key: LODSQ:  [ 0.00  0.00 ]
+Key: LODSW:  [ 0.00  0.00 ]
+Key: LOOP:  [ 0.00  0.00 ]
+Key: LOOPE:  [ 0.00  0.00 ]
+Key: LOOPNE:  [ 0.00  0.00 ]
+Key: LRET:  [ 0.00  0.00 ]
+Key: LRETI:  [ 0.00  0.00 ]
+Key: LSL:  [ 0.00  0.00 ]
+Key: LSS:  [ 0.00  0.00 ]
+Key: LTRm:  [ 0.00  0.00 ]
+Key: LTRr:  [ 0.00  0.00 ]
+Key: LWPINS:  [ 0.00  0.00 ]
+Key: LWPVAL:  [ 0.00  0.00 ]
+Key: LXADD:  [ 0.00  0.00 ]
+Key: LZCNT:  [ 0.00  0.00 ]
+Key: MASKMOVDQU:  [ 0.00  0.00 ]
+Key: MASKPAIR:  [ 0.00  0.00 ]
+Key: MAXCPDrm:  [ 0.00  0.00 ]
+Key: MAXCPDrr:  [ 0.00  0.00 ]
+Key: MAXCPSrm:  [ 0.00  0.00 ]
+Key: MAXCPSrr:  [ 0.00  0.00 ]
+Key: MAXCSDrm:  [ 0.00  0.00 ]
+Key: MAXCSDrr:  [ 0.00  0.00 ]
+Key: MAXCSSrm:  [ 0.00  0.00 ]
+Key: MAXCSSrr:  [ 0.00  0.00 ]
+Key: MAXPDrm:  [ 0.00  0.00 ]
+Key: MAXPDrr:  [ 0.00  0.00 ]
+Key: MAXPSrm:  [ 0.00  0.00 ]
+Key: MAXPSrr:  [ 0.00  0.00 ]
+Key: MAXSDrm:  [ 0.00  0.00 ]
+Key: MAXSDrm_Int:  [ 0.00  0.00 ]
+Key: MAXSDrr:  [ 0.00  0.00 ]
+Key: MAXSDrr_Int:  [ 0.00  0.00 ]
+Key: MAXSSrm:  [ 0.00  0.00 ]
+Key: MAXSSrm_Int:  [ 0.00  0.00 ]
+Key: MAXSSrr:  [ 0.00  0.00 ]
+Key: MAXSSrr_Int:  [ 0.00  0.00 ]
+Key: MEMBARRIER:  [ 0.00  0.00 ]
+Key: MFENCE:  [ 0.00  0.00 ]
+Key: MINCPDrm:  [ 0.00  0.00 ]
+Key: MINCPDrr:  [ 0.00  0.00 ]
+Key: MINCPSrm:  [ 0.00  0.00 ]
+Key: MINCPSrr:  [ 0.00  0.00 ]
+Key: MINCSDrm:  [ 0.00  0.00 ]
+Key: MINCSDrr:  [ 0.00  0.00 ]
+Key: MINCSSrm:  [ 0.00  0.00 ]
+Key: MINCSSrr:  [ 0.00  0.00 ]
+Key: MINPDrm:  [ 0.00  0.00 ]
+Key: MINPDrr:  [ 0.00  0.00 ]
+Key: MINPSrm:  [ 0.00  0.00 ]
+Key: MINPSrr:  [ 0.00  0.00 ]
+Key: MINSDrm:  [ 0.00  0.00 ]
+Key: MINSDrm_Int:  [ 0.00  0.00 ]
+Key: MINSDrr:  [ 0.00  0.00 ]
+Key: MINSDrr_Int:  [ 0.00  0.00 ]
+Key: MINSSrm:  [ 0.00  0.00 ]
+Key: MINSSrm_Int:  [ 0.00  0.00 ]
+Key: MINSSrr:  [ 0.00  0.00 ]
+Key: MINSSrr_Int:  [ 0.00  0.00 ]
+Key: MMX_CVTPD:  [ 0.00  0.00 ]
+Key: MMX_CVTPI:  [ 0.00  0.00 ]
+Key: MMX_CVTPS:  [ 0.00  0.00 ]
+Key: MMX_CVTTPD:  [ 0.00  0.00 ]
+Key: MMX_CVTTPS:  [ 0.00  0.00 ]
+Key: MMX_EMMS:  [ 0.00  0.00 ]
+Key: MMX_MASKMOVQ:  [ 0.00  0.00 ]
+Key: MMX_MOVD:  [ 0.00  0.00 ]
+Key: MMX_MOVDQ:  [ 0.00  0.00 ]
+Key: MMX_MOVFR:  [ 0.00  0.00 ]
+Key: MMX_MOVNTQmr:  [ 0.00  0.00 ]
+Key: MMX_MOVQ:  [ 0.00  0.00 ]
+Key: MMX_PABSBrm:  [ 0.00  0.00 ]
+Key: MMX_PABSBrr:  [ 0.00  0.00 ]
+Key: MMX_PABSDrm:  [ 0.00  0.00 ]
+Key: MMX_PABSDrr:  [ 0.00  0.00 ]
+Key: MMX_PABSWrm:  [ 0.00  0.00 ]
+Key: MMX_PABSWrr:  [ 0.00  0.00 ]
+Key: MMX_PACKSSDWrm:  [ 0.00  0.00 ]
+Key: MMX_PACKSSDWrr:  [ 0.00  0.00 ]
+Key: MMX_PACKSSWBrm:  [ 0.00  0.00 ]
+Key: MMX_PACKSSWBrr:  [ 0.00  0.00 ]
+Key: MMX_PACKUSWBrm:  [ 0.00  0.00 ]
+Key: MMX_PACKUSWBrr:  [ 0.00  0.00 ]
+Key: MMX_PADDBrm:  [ 0.00  0.00 ]
+Key: MMX_PADDBrr:  [ 0.00  0.00 ]
+Key: MMX_PADDDrm:  [ 0.00  0.00 ]
+Key: MMX_PADDDrr:  [ 0.00  0.00 ]
+Key: MMX_PADDQrm:  [ 0.00  0.00 ]
+Key: MMX_PADDQrr:  [ 0.00  0.00 ]
+Key: MMX_PADDSBrm:  [ 0.00  0.00 ]
+Key: MMX_PADDSBrr:  [ 0.00  0.00 ]
+Key: MMX_PADDSWrm:  [ 0.00  0.00 ]
+Key: MMX_PADDSWrr:  [ 0.00  0.00 ]
+Key: MMX_PADDUSBrm:  [ 0.00  0.00 ]
+Key: MMX_PADDUSBrr:  [ 0.00  0.00 ]
+Key: MMX_PADDUSWrm:  [ 0.00  0.00 ]
+Key: MMX_PADDUSWrr:  [ 0.00  0.00 ]
+Key: MMX_PADDWrm:  [ 0.00  0.00 ]
+Key: MMX_PADDWrr:  [ 0.00  0.00 ]
+Key: MMX_PALIGNRrmi:  [ 0.00  0.00 ]
+Key: MMX_PALIGNRrri:  [ 0.00  0.00 ]
+Key: MMX_PANDNrm:  [ 0.00  0.00 ]
+Key: MMX_PANDNrr:  [ 0.00  0.00 ]
+Key: MMX_PANDrm:  [ 0.00  0.00 ]
+Key: MMX_PANDrr:  [ 0.00  0.00 ]
+Key: MMX_PAVGBrm:  [ 0.00  0.00 ]
+Key: MMX_PAVGBrr:  [ 0.00  0.00 ]
+Key: MMX_PAVGWrm:  [ 0.00  0.00 ]
+Key: MMX_PAVGWrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQBrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQBrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQDrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQDrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQWrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPEQWrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTBrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTBrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTDrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTDrr:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTWrm:  [ 0.00  0.00 ]
+Key: MMX_PCMPGTWrr:  [ 0.00  0.00 ]
+Key: MMX_PEXTRWrri:  [ 0.00  0.00 ]
+Key: MMX_PHADDDrm:  [ 0.00  0.00 ]
+Key: MMX_PHADDDrr:  [ 0.00  0.00 ]
+Key: MMX_PHADDSWrm:  [ 0.00  0.00 ]
+Key: MMX_PHADDSWrr:  [ 0.00  0.00 ]
+Key: MMX_PHADDWrm:  [ 0.00  0.00 ]
+Key: MMX_PHADDWrr:  [ 0.00  0.00 ]
+Key: MMX_PHSUBDrm:  [ 0.00  0.00 ]
+Key: MMX_PHSUBDrr:  [ 0.00  0.00 ]
+Key: MMX_PHSUBSWrm:  [ 0.00  0.00 ]
+Key: MMX_PHSUBSWrr:  [ 0.00  0.00 ]
+Key: MMX_PHSUBWrm:  [ 0.00  0.00 ]
+Key: MMX_PHSUBWrr:  [ 0.00  0.00 ]
+Key: MMX_PINSRWrmi:  [ 0.00  0.00 ]
+Key: MMX_PINSRWrri:  [ 0.00  0.00 ]
+Key: MMX_PMADDUBSWrm:  [ 0.00  0.00 ]
+Key: MMX_PMADDUBSWrr:  [ 0.00  0.00 ]
+Key: MMX_PMADDWDrm:  [ 0.00  0.00 ]
+Key: MMX_PMADDWDrr:  [ 0.00  0.00 ]
+Key: MMX_PMAXSWrm:  [ 0.00  0.00 ]
+Key: MMX_PMAXSWrr:  [ 0.00  0.00 ]
+Key: MMX_PMAXUBrm:  [ 0.00  0.00 ]
+Key: MMX_PMAXUBrr:  [ 0.00  0.00 ]
+Key: MMX_PMINSWrm:  [ 0.00  0.00 ]
+Key: MMX_PMINSWrr:  [ 0.00  0.00 ]
+Key: MMX_PMINUBrm:  [ 0.00  0.00 ]
+Key: MMX_PMINUBrr:  [ 0.00  0.00 ]
+Key: MMX_PMOVMSKBrr:  [ 0.00  0.00 ]
+Key: MMX_PMULHRSWrm:  [ 0.00  0.00 ]
+Key: MMX_PMULHRSWrr:  [ 0.00  0.00 ]
+Key: MMX_PMULHUWrm:  [ 0.00  0.00 ]
+Key: MMX_PMULHUWrr:  [ 0.00  0.00 ]
+Key: MMX_PMULHWrm:  [ 0.00  0.00 ]
+Key: MMX_PMULHWrr:  [ 0.00  0.00 ]
+Key: MMX_PMULLWrm:  [ 0.00  0.00 ]
+Key: MMX_PMULLWrr:  [ 0.00  0.00 ]
+Key: MMX_PMULUDQrm:  [ 0.00  0.00 ]
+Key: MMX_PMULUDQrr:  [ 0.00  0.00 ]
+Key: MMX_PORrm:  [ 0.00  0.00 ]
+Key: MMX_PORrr:  [ 0.00  0.00 ]
+Key: MMX_PSADBWrm:  [ 0.00  0.00 ]
+Key: MMX_PSADBWrr:  [ 0.00  0.00 ]
+Key: MMX_PSHUFBrm:  [ 0.00  0.00 ]
+Key: MMX_PSHUFBrr:  [ 0.00  0.00 ]
+Key: MMX_PSHUFWmi:  [ 0.00  0.00 ]
+Key: MMX_PSHUFWri:  [ 0.00  0.00 ]
+Key: MMX_PSIGNBrm:  [ 0.00  0.00 ]
+Key: MMX_PSIGNBrr:  [ 0.00  0.00 ]
+Key: MMX_PSIGNDrm:  [ 0.00  0.00 ]
+Key: MMX_PSIGNDrr:  [ 0.00  0.00 ]
+Key: MMX_PSIGNWrm:  [ 0.00  0.00 ]
+Key: MMX_PSIGNWrr:  [ 0.00  0.00 ]
+Key: MMX_PSLLDri:  [ 0.00  0.00 ]
+Key: MMX_PSLLDrm:  [ 0.00  0.00 ]
+Key: MMX_PSLLDrr:  [ 0.00  0.00 ]
+Key: MMX_PSLLQri:  [ 0.00  0.00 ]
+Key: MMX_PSLLQrm:  [ 0.00  0.00 ]
+Key: MMX_PSLLQrr:  [ 0.00  0.00 ]
+Key: MMX_PSLLWri:  [ 0.00  0.00 ]
+Key: MMX_PSLLWrm:  [ 0.00  0.00 ]
+Key: MMX_PSLLWrr:  [ 0.00  0.00 ]
+Key: MMX_PSRADri:  [ 0.00  0.00 ]
+Key: MMX_PSRADrm:  [ 0.00  0.00 ]
+Key: MMX_PSRADrr:  [ 0.00  0.00 ]
+Key: MMX_PSRAWri:  [ 0.00  0.00 ]
+Key: MMX_PSRAWrm:  [ 0.00  0.00 ]
+Key: MMX_PSRAWrr:  [ 0.00  0.00 ]
+Key: MMX_PSRLDri:  [ 0.00  0.00 ]
+Key: MMX_PSRLDrm:  [ 0.00  0.00 ]
+Key: MMX_PSRLDrr:  [ 0.00  0.00 ]
+Key: MMX_PSRLQri:  [ 0.00  0.00 ]
+Key: MMX_PSRLQrm:  [ 0.00  0.00 ]
+Key: MMX_PSRLQrr:  [ 0.00  0.00 ]
+Key: MMX_PSRLWri:  [ 0.00  0.00 ]
+Key: MMX_PSRLWrm:  [ 0.00  0.00 ]
+Key: MMX_PSRLWrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBBrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBBrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBDrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBDrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBQrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBQrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBSBrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBSBrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBSWrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBSWrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBUSBrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBUSBrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBUSWrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBUSWrr:  [ 0.00  0.00 ]
+Key: MMX_PSUBWrm:  [ 0.00  0.00 ]
+Key: MMX_PSUBWrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHBWrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHBWrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHDQrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHDQrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHWDrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKHWDrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLBWrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLBWrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLDQrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLDQrr:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLWDrm:  [ 0.00  0.00 ]
+Key: MMX_PUNPCKLWDrr:  [ 0.00  0.00 ]
+Key: MMX_PXORrm:  [ 0.00  0.00 ]
+Key: MMX_PXORrr:  [ 0.00  0.00 ]
+Key: MMX_SET:  [ 0.00  0.00 ]
+Key: MONITOR:  [ 0.00  0.00 ]
+Key: MONITORX:  [ 0.00  0.00 ]
+Key: MONTMUL:  [ 0.00  0.00 ]
+Key: MORESTACK_RET:  [ 0.00  0.00 ]
+Key: MORESTACK_RET_RESTORE_R:  [ 0.00  0.00 ]
+Key: MOV:  [ 0.00  0.00 ]
+Key: MOVAPDmr:  [ 0.00  0.00 ]
+Key: MOVAPDrm:  [ 0.00  0.00 ]
+Key: MOVAPDrr:  [ 0.00  0.00 ]
+Key: MOVAPDrr_REV:  [ 0.00  0.00 ]
+Key: MOVAPSmr:  [ 0.00  0.00 ]
+Key: MOVAPSrm:  [ 0.00  0.00 ]
+Key: MOVAPSrr:  [ 0.00  0.00 ]
+Key: MOVAPSrr_REV:  [ 0.00  0.00 ]
+Key: MOVBE:  [ 0.00  0.00 ]
+Key: MOVDDUPrm:  [ 0.00  0.00 ]
+Key: MOVDDUPrr:  [ 0.00  0.00 ]
+Key: MOVDI:  [ 0.00  0.00 ]
+Key: MOVDIR:  [ 0.00  0.00 ]
+Key: MOVDIRI:  [ 0.00  0.00 ]
+Key: MOVDQAmr:  [ 0.00  0.00 ]
+Key: MOVDQArm:  [ 0.00  0.00 ]
+Key: MOVDQArr:  [ 0.00  0.00 ]
+Key: MOVDQArr_REV:  [ 0.00  0.00 ]
+Key: MOVDQUmr:  [ 0.00  0.00 ]
+Key: MOVDQUrm:  [ 0.00  0.00 ]
+Key: MOVDQUrr:  [ 0.00  0.00 ]
+Key: MOVDQUrr_REV:  [ 0.00  0.00 ]
+Key: MOVHLPSrr:  [ 0.00  0.00 ]
+Key: MOVHPDmr:  [ 0.00  0.00 ]
+Key: MOVHPDrm:  [ 0.00  0.00 ]
+Key: MOVHPSmr:  [ 0.00  0.00 ]
+Key: MOVHPSrm:  [ 0.00  0.00 ]
+Key: MOVLHPSrr:  [ 0.00  0.00 ]
+Key: MOVLPDmr:  [ 0.00  0.00 ]
+Key: MOVLPDrm:  [ 0.00  0.00 ]
+Key: MOVLPSmr:  [ 0.00  0.00 ]
+Key: MOVLPSrm:  [ 0.00  0.00 ]
+Key: MOVMSKPDrr:  [ 0.00  0.00 ]
+Key: MOVMSKPSrr:  [ 0.00  0.00 ]
+Key: MOVNTDQArm:  [ 0.00  0.00 ]
+Key: MOVNTDQmr:  [ 0.00  0.00 ]
+Key: MOVNTI:  [ 0.00  0.00 ]
+Key: MOVNTImr:  [ 0.00  0.00 ]
+Key: MOVNTPDmr:  [ 0.00  0.00 ]
+Key: MOVNTPSmr:  [ 0.00  0.00 ]
+Key: MOVNTSD:  [ 0.00  0.00 ]
+Key: MOVNTSS:  [ 0.00  0.00 ]
+Key: MOVPC:  [ 0.00  0.00 ]
+Key: MOVPDI:  [ 0.00  0.00 ]
+Key: MOVPQI:  [ 0.00  0.00 ]
+Key: MOVPQIto:  [ 0.00  0.00 ]
+Key: MOVQI:  [ 0.00  0.00 ]
+Key: MOVRS:  [ 0.00  0.00 ]
+Key: MOVSB:  [ 0.00  0.00 ]
+Key: MOVSDmr:  [ 0.00  0.00 ]
+Key: MOVSDrm:  [ 0.00  0.00 ]
+Key: MOVSDrm_alt:  [ 0.00  0.00 ]
+Key: MOVSDrr:  [ 0.00  0.00 ]
+Key: MOVSDrr_REV:  [ 0.00  0.00 ]
+Key: MOVSDto:  [ 0.00  0.00 ]
+Key: MOVSHDUPrm:  [ 0.00  0.00 ]
+Key: MOVSHDUPrr:  [ 0.00  0.00 ]
+Key: MOVSHPmr:  [ 0.00  0.00 ]
+Key: MOVSHPrm:  [ 0.00  0.00 ]
+Key: MOVSL:  [ 0.00  0.00 ]
+Key: MOVSLDUPrm:  [ 0.00  0.00 ]
+Key: MOVSLDUPrr:  [ 0.00  0.00 ]
+Key: MOVSQ:  [ 0.00  0.00 ]
+Key: MOVSS:  [ 0.00  0.00 ]
+Key: MOVSSmr:  [ 0.00  0.00 ]
+Key: MOVSSrm:  [ 0.00  0.00 ]
+Key: MOVSSrm_alt:  [ 0.00  0.00 ]
+Key: MOVSSrr:  [ 0.00  0.00 ]
+Key: MOVSSrr_REV:  [ 0.00  0.00 ]
+Key: MOVSW:  [ 0.00  0.00 ]
+Key: MOVSX:  [ 0.00  0.00 ]
+Key: MOVUPDmr:  [ 0.00  0.00 ]
+Key: MOVUPDrm:  [ 0.00  0.00 ]
+Key: MOVUPDrr:  [ 0.00  0.00 ]
+Key: MOVUPDrr_REV:  [ 0.00  0.00 ]
+Key: MOVUPSmr:  [ 0.00  0.00 ]
+Key: MOVUPSrm:  [ 0.00  0.00 ]
+Key: MOVUPSrr:  [ 0.00  0.00 ]
+Key: MOVUPSrr_REV:  [ 0.00  0.00 ]
+Key: MOVZPQILo:  [ 0.00  0.00 ]
+Key: MOVZX:  [ 0.00  0.00 ]
+Key: MPSADBWrmi:  [ 0.00  0.00 ]
+Key: MPSADBWrri:  [ 0.00  0.00 ]
+Key: MUL:  [ 0.00  0.00 ]
+Key: MULPDrm:  [ 0.00  0.00 ]
+Key: MULPDrr:  [ 0.00  0.00 ]
+Key: MULPSrm:  [ 0.00  0.00 ]
+Key: MULPSrr:  [ 0.00  0.00 ]
+Key: MULSDrm:  [ 0.00  0.00 ]
+Key: MULSDrm_Int:  [ 0.00  0.00 ]
+Key: MULSDrr:  [ 0.00  0.00 ]
+Key: MULSDrr_Int:  [ 0.00  0.00 ]
+Key: MULSSrm:  [ 0.00  0.00 ]
+Key: MULSSrm_Int:  [ 0.00  0.00 ]
+Key: MULSSrr:  [ 0.00  0.00 ]
+Key: MULSSrr_Int:  [ 0.00  0.00 ]
+Key: MULX:  [ 0.00  0.00 ]
+Key: MUL_F:  [ 0.00  0.00 ]
+Key: MUL_FI:  [ 0.00  0.00 ]
+Key: MUL_FPrST:  [ 0.00  0.00 ]
+Key: MUL_FST:  [ 0.00  0.00 ]
+Key: MUL_Fp:  [ 0.00  0.00 ]
+Key: MUL_FpI:  [ 0.00  0.00 ]
+Key: MUL_FrST:  [ 0.00  0.00 ]
+Key: MWAITX:  [ 0.00  0.00 ]
+Key: MWAITX_SAVE_RBX:  [ 0.00  0.00 ]
+Key: MWAITXrrr:  [ 0.00  0.00 ]
+Key: MWAITrr:  [ 0.00  0.00 ]
+Key: NEG:  [ 0.00  0.00 ]
+Key: NOOP:  [ 0.00  0.00 ]
+Key: NOOPL:  [ 0.00  0.00 ]
+Key: NOOPLr:  [ 0.00  0.00 ]
+Key: NOOPQ:  [ 0.00  0.00 ]
+Key: NOOPQr:  [ 0.00  0.00 ]
+Key: NOOPW:  [ 0.00  0.00 ]
+Key: NOOPWr:  [ 0.00  0.00 ]
+Key: NOT:  [ 0.00  0.00 ]
+Key: OR:  [ 0.00  0.00 ]
+Key: ORPDrm:  [ 0.00  0.00 ]
+Key: ORPDrr:  [ 0.00  0.00 ]
+Key: ORPSrm:  [ 0.00  0.00 ]
+Key: ORPSrr:  [ 0.00  0.00 ]
+Key: OUT:  [ 0.00  0.00 ]
+Key: OUTSB:  [ 0.00  0.00 ]
+Key: OUTSL:  [ 0.00  0.00 ]
+Key: OUTSW:  [ 0.00  0.00 ]
+Key: PABSBrm:  [ 0.00  0.00 ]
+Key: PABSBrr:  [ 0.00  0.00 ]
+Key: PABSDrm:  [ 0.00  0.00 ]
+Key: PABSDrr:  [ 0.00  0.00 ]
+Key: PABSWrm:  [ 0.00  0.00 ]
+Key: PABSWrr:  [ 0.00  0.00 ]
+Key: PACKSSDWrm:  [ 0.00  0.00 ]
+Key: PACKSSDWrr:  [ 0.00  0.00 ]
+Key: PACKSSWBrm:  [ 0.00  0.00 ]
+Key: PACKSSWBrr:  [ 0.00  0.00 ]
+Key: PACKUSDWrm:  [ 0.00  0.00 ]
+Key: PACKUSDWrr:  [ 0.00  0.00 ]
+Key: PACKUSWBrm:  [ 0.00  0.00 ]
+Key: PACKUSWBrr:  [ 0.00  0.00 ]
+Key: PADDBrm:  [ 0.00  0.00 ]
+Key: PADDBrr:  [ 0.00  0.00 ]
+Key: PADDDrm:  [ 0.00  0.00 ]
+Key: PADDDrr:  [ 0.00  0.00 ]
+Key: PADDQrm:  [ 0.00  0.00 ]
+Key: PADDQrr:  [ 0.00  0.00 ]
+Key: PADDSBrm:  [ 0.00  0.00 ]
+Key: PADDSBrr:  [ 0.00  0.00 ]
+Key: PADDSWrm:  [ 0.00  0.00 ]
+Key: PADDSWrr:  [ 0.00  0.00 ]
+Key: PADDUSBrm:  [ 0.00  0.00 ]
+Key: PADDUSBrr:  [ 0.00  0.00 ]
+Key: PADDUSWrm:  [ 0.00  0.00 ]
+Key: PADDUSWrr:  [ 0.00  0.00 ]
+Key: PADDWrm:  [ 0.00  0.00 ]
+Key: PADDWrr:  [ 0.00  0.00 ]
+Key: PALIGNRrmi:  [ 0.00  0.00 ]
+Key: PALIGNRrri:  [ 0.00  0.00 ]
+Key: PANDNrm:  [ 0.00  0.00 ]
+Key: PANDNrr:  [ 0.00  0.00 ]
+Key: PANDrm:  [ 0.00  0.00 ]
+Key: PANDrr:  [ 0.00  0.00 ]
+Key: PATCHABLE_EVENT_CALL:  [ 0.00  0.00 ]
+Key: PATCHABLE_FUNCTION_ENTER:  [ 0.00  0.00 ]
+Key: PATCHABLE_FUNCTION_EXIT:  [ 0.00  0.00 ]
+Key: PATCHABLE_OP:  [ 0.00  0.00 ]
+Key: PATCHABLE_RET:  [ 0.00  0.00 ]
+Key: PATCHABLE_TAIL_CALL:  [ 0.00  0.00 ]
+Key: PATCHABLE_TYPED_EVENT_CALL:  [ 0.00  0.00 ]
+Key: PATCHPOINT:  [ 0.00  0.00 ]
+Key: PAUSE:  [ 0.00  0.00 ]
+Key: PAVGBrm:  [ 0.00  0.00 ]
+Key: PAVGBrr:  [ 0.00  0.00 ]
+Key: PAVGUSBrm:  [ 0.00  0.00 ]
+Key: PAVGUSBrr:  [ 0.00  0.00 ]
+Key: PAVGWrm:  [ 0.00  0.00 ]
+Key: PAVGWrr:  [ 0.00  0.00 ]
+Key: PBLENDVBrm:  [ 0.00  0.00 ]
+Key: PBLENDVBrr:  [ 0.00  0.00 ]
+Key: PBLENDWrmi:  [ 0.00  0.00 ]
+Key: PBLENDWrri:  [ 0.00  0.00 ]
+Key: PBNDKB:  [ 0.00  0.00 ]
+Key: PCLMULQDQrmi:  [ 0.00  0.00 ]
+Key: PCLMULQDQrri:  [ 0.00  0.00 ]
+Key: PCMPEQBrm:  [ 0.00  0.00 ]
+Key: PCMPEQBrr:  [ 0.00  0.00 ]
+Key: PCMPEQDrm:  [ 0.00  0.00 ]
+Key: PCMPEQDrr:  [ 0.00  0.00 ]
+Key: PCMPEQQrm:  [ 0.00  0.00 ]
+Key: PCMPEQQrr:  [ 0.00  0.00 ]
+Key: PCMPEQWrm:  [ 0.00  0.00 ]
+Key: PCMPEQWrr:  [ 0.00  0.00 ]
+Key: PCMPESTRIrmi:  [ 0.00  0.00 ]
+Key: PCMPESTRIrri:  [ 0.00  0.00 ]
+Key: PCMPESTRMrmi:  [ 0.00  0.00 ]
+Key: PCMPESTRMrri:  [ 0.00  0.00 ]
+Key: PCMPGTBrm:  [ 0.00  0.00 ]
+Key: PCMPGTBrr:  [ 0.00  0.00 ]
+Key: PCMPGTDrm:  [ 0.00  0.00 ]
+Key: PCMPGTDrr:  [ 0.00  0.00 ]
+Key: PCMPGTQrm:  [ 0.00  0.00 ]
+Key: PCMPGTQrr:  [ 0.00  0.00 ]
+Key: PCMPGTWrm:  [ 0.00  0.00 ]
+Key: PCMPGTWrr:  [ 0.00  0.00 ]
+Key: PCMPISTRIrmi:  [ 0.00  0.00 ]
+Key: PCMPISTRIrri:  [ 0.00  0.00 ]
+Key: PCMPISTRMrmi:  [ 0.00  0.00 ]
+Key: PCMPISTRMrri:  [ 0.00  0.00 ]
+Key: PCONFIG:  [ 0.00  0.00 ]
+Key: PDEP:  [ 0.00  0.00 ]
+Key: PEXT:  [ 0.00  0.00 ]
+Key: PEXTRBmri:  [ 0.00  0.00 ]
+Key: PEXTRBrri:  [ 0.00  0.00 ]
+Key: PEXTRDmri:  [ 0.00  0.00 ]
+Key: PEXTRDrri:  [ 0.00  0.00 ]
+Key: PEXTRQmri:  [ 0.00  0.00 ]
+Key: PEXTRQrri:  [ 0.00  0.00 ]
+Key: PEXTRWmri:  [ 0.00  0.00 ]
+Key: PEXTRWrri:  [ 0.00  0.00 ]
+Key: PEXTRWrri_REV:  [ 0.00  0.00 ]
+Key: PF:  [ 0.00  0.00 ]
+Key: PFACCrm:  [ 0.00  0.00 ]
+Key: PFACCrr:  [ 0.00  0.00 ]
+Key: PFADDrm:  [ 0.00  0.00 ]
+Key: PFADDrr:  [ 0.00  0.00 ]
+Key: PFCMPEQrm:  [ 0.00  0.00 ]
+Key: PFCMPEQrr:  [ 0.00  0.00 ]
+Key: PFCMPGErm:  [ 0.00  0.00 ]
+Key: PFCMPGErr:  [ 0.00  0.00 ]
+Key: PFCMPGTrm:  [ 0.00  0.00 ]
+Key: PFCMPGTrr:  [ 0.00  0.00 ]
+Key: PFMAXrm:  [ 0.00  0.00 ]
+Key: PFMAXrr:  [ 0.00  0.00 ]
+Key: PFMINrm:  [ 0.00  0.00 ]
+Key: PFMINrr:  [ 0.00  0.00 ]
+Key: PFMULrm:  [ 0.00  0.00 ]
+Key: PFMULrr:  [ 0.00  0.00 ]
+Key: PFNACCrm:  [ 0.00  0.00 ]
+Key: PFNACCrr:  [ 0.00  0.00 ]
+Key: PFPNACCrm:  [ 0.00  0.00 ]
+Key: PFPNACCrr:  [ 0.00  0.00 ]
+Key: PFRCPIT:  [ 0.00  0.00 ]
+Key: PFRCPrm:  [ 0.00  0.00 ]
+Key: PFRCPrr:  [ 0.00  0.00 ]
+Key: PFRSQIT:  [ 0.00  0.00 ]
+Key: PFRSQRTrm:  [ 0.00  0.00 ]
+Key: PFRSQRTrr:  [ 0.00  0.00 ]
+Key: PFSUBRrm:  [ 0.00  0.00 ]
+Key: PFSUBRrr:  [ 0.00  0.00 ]
+Key: PFSUBrm:  [ 0.00  0.00 ]
+Key: PFSUBrr:  [ 0.00  0.00 ]
+Key: PHADDDrm:  [ 0.00  0.00 ]
+Key: PHADDDrr:  [ 0.00  0.00 ]
+Key: PHADDSWrm:  [ 0.00  0.00 ]
+Key: PHADDSWrr:  [ 0.00  0.00 ]
+Key: PHADDWrm:  [ 0.00  0.00 ]
+Key: PHADDWrr:  [ 0.00  0.00 ]
+Key: PHI:  [ 0.00  0.00 ]
+Key: PHMINPOSUWrm:  [ 0.00  0.00 ]
+Key: PHMINPOSUWrr:  [ 0.00  0.00 ]
+Key: PHSUBDrm:  [ 0.00  0.00 ]
+Key: PHSUBDrr:  [ 0.00  0.00 ]
+Key: PHSUBSWrm:  [ 0.00  0.00 ]
+Key: PHSUBSWrr:  [ 0.00  0.00 ]
+Key: PHSUBWrm:  [ 0.00  0.00 ]
+Key: PHSUBWrr:  [ 0.00  0.00 ]
+Key: PI:  [ 0.00  0.00 ]
+Key: PINSRBrmi:  [ 0.00  0.00 ]
+Key: PINSRBrri:  [ 0.00  0.00 ]
+Key: PINSRDrmi:  [ 0.00  0.00 ]
+Key: PINSRDrri:  [ 0.00  0.00 ]
+Key: PINSRQrmi:  [ 0.00  0.00 ]
+Key: PINSRQrri:  [ 0.00  0.00 ]
+Key: PINSRWrmi:  [ 0.00  0.00 ]
+Key: PINSRWrri:  [ 0.00  0.00 ]
+Key: PLDTILECFGV:  [ 0.00  0.00 ]
+Key: PLEA:  [ 0.00  0.00 ]
+Key: PMADDUBSWrm:  [ 0.00  0.00 ]
+Key: PMADDUBSWrr:  [ 0.00  0.00 ]
+Key: PMADDWDrm:  [ 0.00  0.00 ]
+Key: PMADDWDrr:  [ 0.00  0.00 ]
+Key: PMAXSBrm:  [ 0.00  0.00 ]
+Key: PMAXSBrr:  [ 0.00  0.00 ]
+Key: PMAXSDrm:  [ 0.00  0.00 ]
+Key: PMAXSDrr:  [ 0.00  0.00 ]
+Key: PMAXSWrm:  [ 0.00  0.00 ]
+Key: PMAXSWrr:  [ 0.00  0.00 ]
+Key: PMAXUBrm:  [ 0.00  0.00 ]
+Key: PMAXUBrr:  [ 0.00  0.00 ]
+Key: PMAXUDrm:  [ 0.00  0.00 ]
+Key: PMAXUDrr:  [ 0.00  0.00 ]
+Key: PMAXUWrm:  [ 0.00  0.00 ]
+Key: PMAXUWrr:  [ 0.00  0.00 ]
+Key: PMINSBrm:  [ 0.00  0.00 ]
+Key: PMINSBrr:  [ 0.00  0.00 ]
+Key: PMINSDrm:  [ 0.00  0.00 ]
+Key: PMINSDrr:  [ 0.00  0.00 ]
+Key: PMINSWrm:  [ 0.00  0.00 ]
+Key: PMINSWrr:  [ 0.00  0.00 ]
+Key: PMINUBrm:  [ 0.00  0.00 ]
+Key: PMINUBrr:  [ 0.00  0.00 ]
+Key: PMINUDrm:  [ 0.00  0.00 ]
+Key: PMINUDrr:  [ 0.00  0.00 ]
+Key: PMINUWrm:  [ 0.00  0.00 ]
+Key: PMINUWrr:  [ 0.00  0.00 ]
+Key: PMOVMSKBrr:  [ 0.00  0.00 ]
+Key: PMOVSXBDrm:  [ 0.00  0.00 ]
+Key: PMOVSXBDrr:  [ 0.00  0.00 ]
+Key: PMOVSXBQrm:  [ 0.00  0.00 ]
+Key: PMOVSXBQrr:  [ 0.00  0.00 ]
+Key: PMOVSXBWrm:  [ 0.00  0.00 ]
+Key: PMOVSXBWrr:  [ 0.00  0.00 ]
+Key: PMOVSXDQrm:  [ 0.00  0.00 ]
+Key: PMOVSXDQrr:  [ 0.00  0.00 ]
+Key: PMOVSXWDrm:  [ 0.00  0.00 ]
+Key: PMOVSXWDrr:  [ 0.00  0.00 ]
+Key: PMOVSXWQrm:  [ 0.00  0.00 ]
+Key: PMOVSXWQrr:  [ 0.00  0.00 ]
+Key: PMOVZXBDrm:  [ 0.00  0.00 ]
+Key: PMOVZXBDrr:  [ 0.00  0.00 ]
+Key: PMOVZXBQrm:  [ 0.00  0.00 ]
+Key: PMOVZXBQrr:  [ 0.00  0.00 ]
+Key: PMOVZXBWrm:  [ 0.00  0.00 ]
+Key: PMOVZXBWrr:  [ 0.00  0.00 ]
+Key: PMOVZXDQrm:  [ 0.00  0.00 ]
+Key: PMOVZXDQrr:  [ 0.00  0.00 ]
+Key: PMOVZXWDrm:  [ 0.00  0.00 ]
+Key: PMOVZXWDrr:  [ 0.00  0.00 ]
+Key: PMOVZXWQrm:  [ 0.00  0.00 ]
+Key: PMOVZXWQrr:  [ 0.00  0.00 ]
+Key: PMULDQrm:  [ 0.00  0.00 ]
+Key: PMULDQrr:  [ 0.00  0.00 ]
+Key: PMULHRSWrm:  [ 0.00  0.00 ]
+Key: PMULHRSWrr:  [ 0.00  0.00 ]
+Key: PMULHRWrm:  [ 0.00  0.00 ]
+Key: PMULHRWrr:  [ 0.00  0.00 ]
+Key: PMULHUWrm:  [ 0.00  0.00 ]
+Key: PMULHUWrr:  [ 0.00  0.00 ]
+Key: PMULHWrm:  [ 0.00  0.00 ]
+Key: PMULHWrr:  [ 0.00  0.00 ]
+Key: PMULLDrm:  [ 0.00  0.00 ]
+Key: PMULLDrr:  [ 0.00  0.00 ]
+Key: PMULLWrm:  [ 0.00  0.00 ]
+Key: PMULLWrr:  [ 0.00  0.00 ]
+Key: PMULUDQrm:  [ 0.00  0.00 ]
+Key: PMULUDQrr:  [ 0.00  0.00 ]
+Key: POP:  [ 0.00  0.00 ]
+Key: POPA:  [ 0.00  0.00 ]
+Key: POPCNT:  [ 0.00  0.00 ]
+Key: POPDS:  [ 0.00  0.00 ]
+Key: POPES:  [ 0.00  0.00 ]
+Key: POPF:  [ 0.00  0.00 ]
+Key: POPFS:  [ 0.00  0.00 ]
+Key: POPGS:  [ 0.00  0.00 ]
+Key: POPP:  [ 0.00  0.00 ]
+Key: POPSS:  [ 0.00  0.00 ]
+Key: PORrm:  [ 0.00  0.00 ]
+Key: PORrr:  [ 0.00  0.00 ]
+Key: PREALLOCATED_ARG:  [ 0.00  0.00 ]
+Key: PREALLOCATED_SETUP:  [ 0.00  0.00 ]
+Key: PREFETCH:  [ 0.00  0.00 ]
+Key: PREFETCHIT:  [ 0.00  0.00 ]
+Key: PREFETCHNTA:  [ 0.00  0.00 ]
+Key: PREFETCHRST:  [ 0.00  0.00 ]
+Key: PREFETCHT:  [ 0.00  0.00 ]
+Key: PREFETCHW:  [ 0.00  0.00 ]
+Key: PREFETCHWT:  [ 0.00  0.00 ]
+Key: PROBED_ALLOCA:  [ 0.00  0.00 ]
+Key: PSADBWrm:  [ 0.00  0.00 ]
+Key: PSADBWrr:  [ 0.00  0.00 ]
+Key: PSEUDO_PROBE:  [ 0.00  0.00 ]
+Key: PSHUFBrm:  [ 0.00  0.00 ]
+Key: PSHUFBrr:  [ 0.00  0.00 ]
+Key: PSHUFDmi:  [ 0.00  0.00 ]
+Key: PSHUFDri:  [ 0.00  0.00 ]
+Key: PSHUFHWmi:  [ 0.00  0.00 ]
+Key: PSHUFHWri:  [ 0.00  0.00 ]
+Key: PSHUFLWmi:  [ 0.00  0.00 ]
+Key: PSHUFLWri:  [ 0.00  0.00 ]
+Key: PSIGNBrm:  [ 0.00  0.00 ]
+Key: PSIGNBrr:  [ 0.00  0.00 ]
+Key: PSIGNDrm:  [ 0.00  0.00 ]
+Key: PSIGNDrr:  [ 0.00  0.00 ]
+Key: PSIGNWrm:  [ 0.00  0.00 ]
+Key: PSIGNWrr:  [ 0.00  0.00 ]
+Key: PSLLDQri:  [ 0.00  0.00 ]
+Key: PSLLDri:  [ 0.00  0.00 ]
+Key: PSLLDrm:  [ 0.00  0.00 ]
+Key: PSLLDrr:  [ 0.00  0.00 ]
+Key: PSLLQri:  [ 0.00  0.00 ]
+Key: PSLLQrm:  [ 0.00  0.00 ]
+Key: PSLLQrr:  [ 0.00  0.00 ]
+Key: PSLLWri:  [ 0.00  0.00 ]
+Key: PSLLWrm:  [ 0.00  0.00 ]
+Key: PSLLWrr:  [ 0.00  0.00 ]
+Key: PSMASH:  [ 0.00  0.00 ]
+Key: PSRADri:  [ 0.00  0.00 ]
+Key: PSRADrm:  [ 0.00  0.00 ]
+Key: PSRADrr:  [ 0.00  0.00 ]
+Key: PSRAWri:  [ 0.00  0.00 ]
+Key: PSRAWrm:  [ 0.00  0.00 ]
+Key: PSRAWrr:  [ 0.00  0.00 ]
+Key: PSRLDQri:  [ 0.00  0.00 ]
+Key: PSRLDri:  [ 0.00  0.00 ]
+Key: PSRLDrm:  [ 0.00  0.00 ]
+Key: PSRLDrr:  [ 0.00  0.00 ]
+Key: PSRLQri:  [ 0.00  0.00 ]
+Key: PSRLQrm:  [ 0.00  0.00 ]
+Key: PSRLQrr:  [ 0.00  0.00 ]
+Key: PSRLWri:  [ 0.00  0.00 ]
+Key: PSRLWrm:  [ 0.00  0.00 ]
+Key: PSRLWrr:  [ 0.00  0.00 ]
+Key: PSUBBrm:  [ 0.00  0.00 ]
+Key: PSUBBrr:  [ 0.00  0.00 ]
+Key: PSUBDrm:  [ 0.00  0.00 ]
+Key: PSUBDrr:  [ 0.00  0.00 ]
+Key: PSUBQrm:  [ 0.00  0.00 ]
+Key: PSUBQrr:  [ 0.00  0.00 ]
+Key: PSUBSBrm:  [ 0.00  0.00 ]
+Key: PSUBSBrr:  [ 0.00  0.00 ]
+Key: PSUBSWrm:  [ 0.00  0.00 ]
+Key: PSUBSWrr:  [ 0.00  0.00 ]
+Key: PSUBUSBrm:  [ 0.00  0.00 ]
+Key: PSUBUSBrr:  [ 0.00  0.00 ]
+Key: PSUBUSWrm:  [ 0.00  0.00 ]
+Key: PSUBUSWrr:  [ 0.00  0.00 ]
+Key: PSUBWrm:  [ 0.00  0.00 ]
+Key: PSUBWrr:  [ 0.00  0.00 ]
+Key: PSWAPDrm:  [ 0.00  0.00 ]
+Key: PSWAPDrr:  [ 0.00  0.00 ]
+Key: PT:  [ 0.00  0.00 ]
+Key: PTCMMIMFP:  [ 0.00  0.00 ]
+Key: PTCMMRLFP:  [ 0.00  0.00 ]
+Key: PTCONJTCMMIMFP:  [ 0.00  0.00 ]
+Key: PTCONJTFP:  [ 0.00  0.00 ]
+Key: PTCVTROWD:  [ 0.00  0.00 ]
+Key: PTCVTROWPS:  [ 0.00  0.00 ]
+Key: PTDPBF:  [ 0.00  0.00 ]
+Key: PTDPBHF:  [ 0.00  0.00 ]
+Key: PTDPBSSD:  [ 0.00  0.00 ]
+Key: PTDPBSSDV:  [ 0.00  0.00 ]
+Key: PTDPBSUD:  [ 0.00  0.00 ]
+Key: PTDPBSUDV:  [ 0.00  0.00 ]
+Key: PTDPBUSD:  [ 0.00  0.00 ]
+Key: PTDPBUSDV:  [ 0.00  0.00 ]
+Key: PTDPBUUD:  [ 0.00  0.00 ]
+Key: PTDPBUUDV:  [ 0.00  0.00 ]
+Key: PTDPFP:  [ 0.00  0.00 ]
+Key: PTDPHBF:  [ 0.00  0.00 ]
+Key: PTDPHF:  [ 0.00  0.00 ]
+Key: PTESTrm:  [ 0.00  0.00 ]
+Key: PTESTrr:  [ 0.00  0.00 ]
+Key: PTILELOADD:  [ 0.00  0.00 ]
+Key: PTILELOADDRS:  [ 0.00  0.00 ]
+Key: PTILELOADDRST:  [ 0.00  0.00 ]
+Key: PTILELOADDRSV:  [ 0.00  0.00 ]
+Key: PTILELOADDT:  [ 0.00  0.00 ]
+Key: PTILELOADDV:  [ 0.00  0.00 ]
+Key: PTILEMOVROWrre:  [ 0.00  0.00 ]
+Key: PTILEMOVROWrreV:  [ 0.00  0.00 ]
+Key: PTILEMOVROWrri:  [ 0.00  0.00 ]
+Key: PTILEMOVROWrriV:  [ 0.00  0.00 ]
+Key: PTILEPAIRLOAD:  [ 0.00  0.00 ]
+Key: PTILEPAIRSTORE:  [ 0.00  0.00 ]
+Key: PTILESTORED:  [ 0.00  0.00 ]
+Key: PTILESTOREDV:  [ 0.00  0.00 ]
+Key: PTILEZERO:  [ 0.00  0.00 ]
+Key: PTILEZEROV:  [ 0.00  0.00 ]
+Key: PTMMULTF:  [ 0.00  0.00 ]
+Key: PTTCMMIMFP:  [ 0.00  0.00 ]
+Key: PTTCMMRLFP:  [ 0.00  0.00 ]
+Key: PTTDPBF:  [ 0.00  0.00 ]
+Key: PTTDPFP:  [ 0.00  0.00 ]
+Key: PTTMMULTF:  [ 0.00  0.00 ]
+Key: PTTRANSPOSED:  [ 0.00  0.00 ]
+Key: PTTRANSPOSEDV:  [ 0.00  0.00 ]
+Key: PTWRITE:  [ 0.00  0.00 ]
+Key: PTWRITEm:  [ 0.00  0.00 ]
+Key: PTWRITEr:  [ 0.00  0.00 ]
+Key: PUNPCKHBWrm:  [ 0.00  0.00 ]
+Key: PUNPCKHBWrr:  [ 0.00  0.00 ]
+Key: PUNPCKHDQrm:  [ 0.00  0.00 ]
+Key: PUNPCKHDQrr:  [ 0.00  0.00 ]
+Key: PUNPCKHQDQrm:  [ 0.00  0.00 ]
+Key: PUNPCKHQDQrr:  [ 0.00  0.00 ]
+Key: PUNPCKHWDrm:  [ 0.00  0.00 ]
+Key: PUNPCKHWDrr:  [ 0.00  0.00 ]
+Key: PUNPCKLBWrm:  [ 0.00  0.00 ]
+Key: PUNPCKLBWrr:  [ 0.00  0.00 ]
+Key: PUNPCKLDQrm:  [ 0.00  0.00 ]
+Key: PUNPCKLDQrr:  [ 0.00  0.00 ]
+Key: PUNPCKLQDQrm:  [ 0.00  0.00 ]
+Key: PUNPCKLQDQrr:  [ 0.00  0.00 ]
+Key: PUNPCKLWDrm:  [ 0.00  0.00 ]
+Key: PUNPCKLWDrr:  [ 0.00  0.00 ]
+Key: PUSH:  [ 0.00  0.00 ]
+Key: PUSHA:  [ 0.00  0.00 ]
+Key: PUSHCS:  [ 0.00  0.00 ]
+Key: PUSHDS:  [ 0.00  0.00 ]
+Key: PUSHES:  [ 0.00  0.00 ]
+Key: PUSHF:  [ 0.00  0.00 ]
+Key: PUSHFS:  [ 0.00  0.00 ]
+Key: PUSHGS:  [ 0.00  0.00 ]
+Key: PUSHP:  [ 0.00  0.00 ]
+Key: PUSHSS:  [ 0.00  0.00 ]
+Key: PVALIDATE:  [ 0.00  0.00 ]
+Key: PXORrm:  [ 0.00  0.00 ]
+Key: PXORrr:  [ 0.00  0.00 ]
+Key: RCL:  [ 0.00  0.00 ]
+Key: RCPPSm:  [ 0.00  0.00 ]
+Key: RCPPSr:  [ 0.00  0.00 ]
+Key: RCPSSm:  [ 0.00  0.00 ]
+Key: RCPSSm_Int:  [ 0.00  0.00 ]
+Key: RCPSSr:  [ 0.00  0.00 ]
+Key: RCPSSr_Int:  [ 0.00  0.00 ]
+Key: RCR:  [ 0.00  0.00 ]
+Key: RDFLAGS:  [ 0.00  0.00 ]
+Key: RDFSBASE:  [ 0.00  0.00 ]
+Key: RDGSBASE:  [ 0.00  0.00 ]
+Key: RDMSR:  [ 0.00  0.00 ]
+Key: RDMSRLIST:  [ 0.00  0.00 ]
+Key: RDMSRri:  [ 0.00  0.00 ]
+Key: RDMSRri_EVEX:  [ 0.00  0.00 ]
+Key: RDPID:  [ 0.00  0.00 ]
+Key: RDPKRUr:  [ 0.00  0.00 ]
+Key: RDPMC:  [ 0.00  0.00 ]
+Key: RDPRU:  [ 0.00  0.00 ]
+Key: RDRAND:  [ 0.00  0.00 ]
+Key: RDSEED:  [ 0.00  0.00 ]
+Key: RDSSPD:  [ 0.00  0.00 ]
+Key: RDSSPQ:  [ 0.00  0.00 ]
+Key: RDTSC:  [ 0.00  0.00 ]
+Key: RDTSCP:  [ 0.00  0.00 ]
+Key: REG_SEQUENCE:  [ 0.00  0.00 ]
+Key: REPNE_PREFIX:  [ 0.00  0.00 ]
+Key: REP_MOVSB:  [ 0.00  0.00 ]
+Key: REP_MOVSD:  [ 0.00  0.00 ]
+Key: REP_MOVSQ:  [ 0.00  0.00 ]
+Key: REP_MOVSW:  [ 0.00  0.00 ]
+Key: REP_PREFIX:  [ 0.00  0.00 ]
+Key: REP_STOSB:  [ 0.00  0.00 ]
+Key: REP_STOSD:  [ 0.00  0.00 ]
+Key: REP_STOSQ:  [ 0.00  0.00 ]
+Key: REP_STOSW:  [ 0.00  0.00 ]
+Key: RET:  [ 0.00  0.00 ]
+Key: RETI:  [ 0.00  0.00 ]
+Key: REX:  [ 0.00  0.00 ]
+Key: RMPADJUST:  [ 0.00  0.00 ]
+Key: RMPQUERY:  [ 0.00  0.00 ]
+Key: RMPUPDATE:  [ 0.00  0.00 ]
+Key: ROL:  [ 0.00  0.00 ]
+Key: ROR:  [ 0.00  0.00 ]
+Key: RORX:  [ 0.00  0.00 ]
+Key: ROUNDPDmi:  [ 0.00  0.00 ]
+Key: ROUNDPDri:  [ 0.00  0.00 ]
+Key: ROUNDPSmi:  [ 0.00  0.00 ]
+Key: ROUNDPSri:  [ 0.00  0.00 ]
+Key: ROUNDSDmi:  [ 0.00  0.00 ]
+Key: ROUNDSDmi_Int:  [ 0.00  0.00 ]
+Key: ROUNDSDri:  [ 0.00  0.00 ]
+Key: ROUNDSDri_Int:  [ 0.00  0.00 ]
+Key: ROUNDSSmi:  [ 0.00  0.00 ]
+Key: ROUNDSSmi_Int:  [ 0.00  0.00 ]
+Key: ROUNDSSri:  [ 0.00  0.00 ]
+Key: ROUNDSSri_Int:  [ 0.00  0.00 ]
+Key: RSM:  [ 0.00  0.00 ]
+Key: RSQRTPSm:  [ 0.00  0.00 ]
+Key: RSQRTPSr:  [ 0.00  0.00 ]
+Key: RSQRTSSm:  [ 0.00  0.00 ]
+Key: RSQRTSSm_Int:  [ 0.00  0.00 ]
+Key: RSQRTSSr:  [ 0.00  0.00 ]
+Key: RSQRTSSr_Int:  [ 0.00  0.00 ]
+Key: RSTORSSP:  [ 0.00  0.00 ]
+Key: SAHF:  [ 0.00  0.00 ]
+Key: SALC:  [ 0.00  0.00 ]
+Key: SAR:  [ 0.00  0.00 ]
+Key: SARX:  [ 0.00  0.00 ]
+Key: SAVEPREVSSP:  [ 0.00  0.00 ]
+Key: SBB:  [ 0.00  0.00 ]
+Key: SCASB:  [ 0.00  0.00 ]
+Key: SCASL:  [ 0.00  0.00 ]
+Key: SCASQ:  [ 0.00  0.00 ]
+Key: SCASW:  [ 0.00  0.00 ]
+Key: SEAMCALL:  [ 0.00  0.00 ]
+Key: SEAMOPS:  [ 0.00  0.00 ]
+Key: SEAMRET:  [ 0.00  0.00 ]
+Key: SEG_ALLOCA:  [ 0.00  0.00 ]
+Key: SEH_BeginEpilogue:  [ 0.00  0.00 ]
+Key: SEH_EndEpilogue:  [ 0.00  0.00 ]
+Key: SEH_EndPrologue:  [ 0.00  0.00 ]
+Key: SEH_PushFrame:  [ 0.00  0.00 ]
+Key: SEH_PushReg:  [ 0.00  0.00 ]
+Key: SEH_SaveReg:  [ 0.00  0.00 ]
+Key: SEH_SaveXMM:  [ 0.00  0.00 ]
+Key: SEH_SetFrame:  [ 0.00  0.00 ]
+Key: SEH_StackAlign:  [ 0.00  0.00 ]
+Key: SEH_StackAlloc:  [ 0.00  0.00 ]
+Key: SEH_UnwindV:  [ 0.00  0.00 ]
+Key: SEH_UnwindVersion:  [ 0.00  0.00 ]
+Key: SENDUIPI:  [ 0.00  0.00 ]
+Key: SERIALIZE:  [ 0.00  0.00 ]
+Key: SETB_C:  [ 0.00  0.00 ]
+Key: SETCCm:  [ 0.00  0.00 ]
+Key: SETCCm_EVEX:  [ 0.00  0.00 ]
+Key: SETCCr:  [ 0.00  0.00 ]
+Key: SETCCr_EVEX:  [ 0.00  0.00 ]
+Key: SETSSBSY:  [ 0.00  0.00 ]
+Key: SETZUCCm:  [ 0.00  0.00 ]
+Key: SETZUCCr:  [ 0.00  0.00 ]
+Key: SFENCE:  [ 0.00  0.00 ]
+Key: SGDT:  [ 0.00  0.00 ]
+Key: SHA:  [ 0.00  0.00 ]
+Key: SHL:  [ 0.00  0.00 ]
+Key: SHLD:  [ 0.00  0.00 ]
+Key: SHLDROT:  [ 0.00  0.00 ]
+Key: SHLX:  [ 0.00  0.00 ]
+Key: SHR:  [ 0.00  0.00 ]
+Key: SHRD:  [ 0.00  0.00 ]
+Key: SHRDROT:  [ 0.00  0.00 ]
+Key: SHRX:  [ 0.00  0.00 ]
+Key: SHUFPDrmi:  [ 0.00  0.00 ]
+Key: SHUFPDrri:  [ 0.00  0.00 ]
+Key: SHUFPSrmi:  [ 0.00  0.00 ]
+Key: SHUFPSrri:  [ 0.00  0.00 ]
+Key: SIDT:  [ 0.00  0.00 ]
+Key: SKINIT:  [ 0.00  0.00 ]
+Key: SLDT:  [ 0.00  0.00 ]
+Key: SLWPCB:  [ 0.00  0.00 ]
+Key: SMSW:  [ 0.00  0.00 ]
+Key: SQRTPDm:  [ 0.00  0.00 ]
+Key: SQRTPDr:  [ 0.00  0.00 ]
+Key: SQRTPSm:  [ 0.00  0.00 ]
+Key: SQRTPSr:  [ 0.00  0.00 ]
+Key: SQRTSDm:  [ 0.00  0.00 ]
+Key: SQRTSDm_Int:  [ 0.00  0.00 ]
+Key: SQRTSDr:  [ 0.00  0.00 ]
+Key: SQRTSDr_Int:  [ 0.00  0.00 ]
+Key: SQRTSSm:  [ 0.00  0.00 ]
+Key: SQRTSSm_Int:  [ 0.00  0.00 ]
+Key: SQRTSSr:  [ 0.00  0.00 ]
+Key: SQRTSSr_Int:  [ 0.00  0.00 ]
+Key: SQRT_F:  [ 0.00  0.00 ]
+Key: SQRT_Fp:  [ 0.00  0.00 ]
+Key: SS_PREFIX:  [ 0.00  0.00 ]
+Key: STAC:  [ 0.00  0.00 ]
+Key: STACKALLOC_W_PROBING:  [ 0.00  0.00 ]
+Key: STACKMAP:  [ 0.00  0.00 ]
+Key: STATEPOINT:  [ 0.00  0.00 ]
+Key: STC:  [ 0.00  0.00 ]
+Key: STD:  [ 0.00  0.00 ]
+Key: STGI:  [ 0.00  0.00 ]
+Key: STI:  [ 0.00  0.00 ]
+Key: STMXCSR:  [ 0.00  0.00 ]
+Key: STOSB:  [ 0.00  0.00 ]
+Key: STOSL:  [ 0.00  0.00 ]
+Key: STOSQ:  [ 0.00  0.00 ]
+Key: STOSW:  [ 0.00  0.00 ]
+Key: STR:  [ 0.00  0.00 ]
+Key: STRm:  [ 0.00  0.00 ]
+Key: STTILECFG:  [ 0.00  0.00 ]
+Key: STTILECFG_EVEX:  [ 0.00  0.00 ]
+Key: STUI:  [ 0.00  0.00 ]
+Key: ST_F:  [ 0.00  0.00 ]
+Key: ST_FP:  [ 0.00  0.00 ]
+Key: ST_FPrr:  [ 0.00  0.00 ]
+Key: ST_Fp:  [ 0.00  0.00 ]
+Key: ST_FpP:  [ 0.00  0.00 ]
+Key: ST_Frr:  [ 0.00  0.00 ]
+Key: SUB:  [ 0.00  0.00 ]
+Key: SUBPDrm:  [ 0.00  0.00 ]
+Key: SUBPDrr:  [ 0.00  0.00 ]
+Key: SUBPSrm:  [ 0.00  0.00 ]
+Key: SUBPSrr:  [ 0.00  0.00 ]
+Key: SUBREG_TO_REG:  [ 0.00  0.00 ]
+Key: SUBR_F:  [ 0.00  0.00 ]
+Key: SUBR_FI:  [ 0.00  0.00 ]
+Key: SUBR_FPrST:  [ 0.00  0.00 ]
+Key: SUBR_FST:  [ 0.00  0.00 ]
+Key: SUBR_Fp:  [ 0.00  0.00 ]
+Key: SUBR_FpI:  [ 0.00  0.00 ]
+Key: SUBR_FrST:  [ 0.00  0.00 ]
+Key: SUBSDrm:  [ 0.00  0.00 ]
+Key: SUBSDrm_Int:  [ 0.00  0.00 ]
+Key: SUBSDrr:  [ 0.00  0.00 ]
+Key: SUBSDrr_Int:  [ 0.00  0.00 ]
+Key: SUBSSrm:  [ 0.00  0.00 ]
+Key: SUBSSrm_Int:  [ 0.00  0.00 ]
+Key: SUBSSrr:  [ 0.00  0.00 ]
+Key: SUBSSrr_Int:  [ 0.00  0.00 ]
+Key: SUB_F:  [ 0.00  0.00 ]
+Key: SUB_FI:  [ 0.00  0.00 ]
+Key: SUB_FPrST:  [ 0.00  0.00 ]
+Key: SUB_FST:  [ 0.00  0.00 ]
+Key: SUB_Fp:  [ 0.00  0.00 ]
+Key: SUB_FpI:  [ 0.00  0.00 ]
+Key: SUB_FrST:  [ 0.00  0.00 ]
+Key: SWAPGS:  [ 0.00  0.00 ]
+Key: SYSCALL:  [ 0.00  0.00 ]
+Key: SYSENTER:  [ 0.00  0.00 ]
+Key: SYSEXIT:  [ 0.00  0.00 ]
+Key: SYSRET:  [ 0.00  0.00 ]
+Key: T:  [ 0.00  0.00 ]
+Key: TAILJMPd:  [ 0.00  0.00 ]
+Key: TAILJMPd_CC:  [ 0.00  0.00 ]
+Key: TAILJMPm:  [ 0.00  0.00 ]
+Key: TAILJMPr:  [ 0.00  0.00 ]
+Key: TCMMIMFP:  [ 0.00  0.00 ]
+Key: TCMMRLFP:  [ 0.00  0.00 ]
+Key: TCONJTCMMIMFP:  [ 0.00  0.00 ]
+Key: TCONJTFP:  [ 0.00  0.00 ]
+Key: TCRETURN_HIPE:  [ 0.00  0.00 ]
+Key: TCRETURN_WIN:  [ 0.00  0.00 ]
+Key: TCRETURN_WINmi:  [ 0.00  0.00 ]
+Key: TCRETURNdi:  [ 0.00  0.00 ]
+Key: TCRETURNdicc:  [ 0.00  0.00 ]
+Key: TCRETURNmi:  [ 0.00  0.00 ]
+Key: TCRETURNri:  [ 0.00  0.00 ]
+Key: TCVTROWD:  [ 0.00  0.00 ]
+Key: TCVTROWPS:  [ 0.00  0.00 ]
+Key: TDCALL:  [ 0.00  0.00 ]
+Key: TDPBF:  [ 0.00  0.00 ]
+Key: TDPBHF:  [ 0.00  0.00 ]
+Key: TDPBSSD:  [ 0.00  0.00 ]
+Key: TDPBSUD:  [ 0.00  0.00 ]
+Key: TDPBUSD:  [ 0.00  0.00 ]
+Key: TDPBUUD:  [ 0.00  0.00 ]
+Key: TDPFP:  [ 0.00  0.00 ]
+Key: TDPHBF:  [ 0.00  0.00 ]
+Key: TDPHF:  [ 0.00  0.00 ]
+Key: TEST:  [ 0.00  0.00 ]
+Key: TESTUI:  [ 0.00  0.00 ]
+Key: TILELOADD:  [ 0.00  0.00 ]
+Key: TILELOADDRS:  [ 0.00  0.00 ]
+Key: TILELOADDRST:  [ 0.00  0.00 ]
+Key: TILELOADDRS_EVEX:  [ 0.00  0.00 ]
+Key: TILELOADDT:  [ 0.00  0.00 ]
+Key: TILELOADD_EVEX:  [ 0.00  0.00 ]
+Key: TILEMOVROWrre:  [ 0.00  0.00 ]
+Key: TILEMOVROWrri:  [ 0.00  0.00 ]
+Key: TILERELEASE:  [ 0.00  0.00 ]
+Key: TILESTORED:  [ 0.00  0.00 ]
+Key: TILESTORED_EVEX:  [ 0.00  0.00 ]
+Key: TILEZERO:  [ 0.00  0.00 ]
+Key: TLBSYNC:  [ 0.00  0.00 ]
+Key: TLSCall:  [ 0.00  0.00 ]
+Key: TLS_addr:  [ 0.00  0.00 ]
+Key: TLS_addrX:  [ 0.00  0.00 ]
+Key: TLS_base_addr:  [ 0.00  0.00 ]
+Key: TLS_base_addrX:  [ 0.00  0.00 ]
+Key: TLS_desc:  [ 0.00  0.00 ]
+Key: TMMULTF:  [ 0.00  0.00 ]
+Key: TPAUSE:  [ 0.00  0.00 ]
+Key: TRAP:  [ 0.00  0.00 ]
+Key: TST_F:  [ 0.00  0.00 ]
+Key: TST_Fp:  [ 0.00  0.00 ]
+Key: TTCMMIMFP:  [ 0.00  0.00 ]
+Key: TTCMMRLFP:  [ 0.00  0.00 ]
+Key: TTDPBF:  [ 0.00  0.00 ]
+Key: TTDPFP:  [ 0.00  0.00 ]
+Key: TTMMULTF:  [ 0.00  0.00 ]
+Key: TTRANSPOSED:  [ 0.00  0.00 ]
+Key: TZCNT:  [ 0.00  0.00 ]
+Key: TZMSK:  [ 0.00  0.00 ]
+Key: UBSAN_UD:  [ 0.00  0.00 ]
+Key: UCOMISDrm:  [ 0.00  0.00 ]
+Key: UCOMISDrm_Int:  [ 0.00  0.00 ]
+Key: UCOMISDrr:  [ 0.00  0.00 ]
+Key: UCOMISDrr_Int:  [ 0.00  0.00 ]
+Key: UCOMISSrm:  [ 0.00  0.00 ]
+Key: UCOMISSrm_Int:  [ 0.00  0.00 ]
+Key: UCOMISSrr:  [ 0.00  0.00 ]
+Key: UCOMISSrr_Int:  [ 0.00  0.00 ]
+Key: UCOM_FIPr:  [ 0.00  0.00 ]
+Key: UCOM_FIr:  [ 0.00  0.00 ]
+Key: UCOM_FPPr:  [ 0.00  0.00 ]
+Key: UCOM_FPr:  [ 0.00  0.00 ]
+Key: UCOM_FpIr:  [ 0.00  0.00 ]
+Key: UCOM_Fpr:  [ 0.00  0.00 ]
+Key: UCOM_Fr:  [ 0.00  0.00 ]
+Key: UD:  [ 0.00  0.00 ]
+Key: UIRET:  [ 0.00  0.00 ]
+Key: UMONITOR:  [ 0.00  0.00 ]
+Key: UMWAIT:  [ 0.00  0.00 ]
+Key: UNPCKHPDrm:  [ 0.00  0.00 ]
+Key: UNPCKHPDrr:  [ 0.00  0.00 ]
+Key: UNPCKHPSrm:  [ 0.00  0.00 ]
+Key: UNPCKHPSrr:  [ 0.00  0.00 ]
+Key: UNPCKLPDrm:  [ 0.00  0.00 ]
+Key: UNPCKLPDrr:  [ 0.00  0.00 ]
+Key: UNPCKLPSrm:  [ 0.00  0.00 ]
+Key: UNPCKLPSrr:  [ 0.00  0.00 ]
+Key: URDMSRri:  [ 0.00  0.00 ]
+Key: URDMSRri_EVEX:  [ 0.00  0.00 ]
+Key: URDMSRrr:  [ 0.00  0.00 ]
+Key: URDMSRrr_EVEX:  [ 0.00  0.00 ]
+Key: UWRMSRir:  [ 0.00  0.00 ]
+Key: UWRMSRir_EVEX:  [ 0.00  0.00 ]
+Key: UWRMSRrr:  [ 0.00  0.00 ]
+Key: UWRMSRrr_EVEX:  [ 0.00  0.00 ]
+Key: V:  [ 0.00  0.00 ]
+Key: VAARG:  [ 0.00  0.00 ]
+Key: VAARG_X:  [ 0.00  0.00 ]
+Key: VADDBF:  [ 0.00  0.00 ]
+Key: VADDPDYrm:  [ 0.00  0.00 ]
+Key: VADDPDYrr:  [ 0.00  0.00 ]
+Key: VADDPDZ:  [ 0.00  0.00 ]
+Key: VADDPDZrm:  [ 0.00  0.00 ]
+Key: VADDPDZrmb:  [ 0.00  0.00 ]
+Key: VADDPDZrmbk:  [ 0.00  0.00 ]
+Key: VADDPDZrmbkz:  [ 0.00  0.00 ]
+Key: VADDPDZrmk:  [ 0.00  0.00 ]
+Key: VADDPDZrmkz:  [ 0.00  0.00 ]
+Key: VADDPDZrr:  [ 0.00  0.00 ]
+Key: VADDPDZrrb:  [ 0.00  0.00 ]
+Key: VADDPDZrrbk:  [ 0.00  0.00 ]
+Key: VADDPDZrrbkz:  [ 0.00  0.00 ]
+Key: VADDPDZrrk:  [ 0.00  0.00 ]
+Key: VADDPDZrrkz:  [ 0.00  0.00 ]
+Key: VADDPDrm:  [ 0.00  0.00 ]
+Key: VADDPDrr:  [ 0.00  0.00 ]
+Key: VADDPHZ:  [ 0.00  0.00 ]
+Key: VADDPHZrm:  [ 0.00  0.00 ]
+Key: VADDPHZrmb:  [ 0.00  0.00 ]
+Key: VADDPHZrmbk:  [ 0.00  0.00 ]
+Key: VADDPHZrmbkz:  [ 0.00  0.00 ]
+Key: VADDPHZrmk:  [ 0.00  0.00 ]
+Key: VADDPHZrmkz:  [ 0.00  0.00 ]
+Key: VADDPHZrr:  [ 0.00  0.00 ]
+Key: VADDPHZrrb:  [ 0.00  0.00 ]
+Key: VADDPHZrrbk:  [ 0.00  0.00 ]
+Key: VADDPHZrrbkz:  [ 0.00  0.00 ]
+Key: VADDPHZrrk:  [ 0.00  0.00 ]
+Key: VADDPHZrrkz:  [ 0.00  0.00 ]
+Key: VADDPSYrm:  [ 0.00  0.00 ]
+Key: VADDPSYrr:  [ 0.00  0.00 ]
+Key: VADDPSZ:  [ 0.00  0.00 ]
+Key: VADDPSZrm:  [ 0.00  0.00 ]
+Key: VADDPSZrmb:  [ 0.00  0.00 ]
+Key: VADDPSZrmbk:  [ 0.00  0.00 ]
+Key: VADDPSZrmbkz:  [ 0.00  0.00 ]
+Key: VADDPSZrmk:  [ 0.00  0.00 ]
+Key: VADDPSZrmkz:  [ 0.00  0.00 ]
+Key: VADDPSZrr:  [ 0.00  0.00 ]
+Key: VADDPSZrrb:  [ 0.00  0.00 ]
+Key: VADDPSZrrbk:  [ 0.00  0.00 ]
+Key: VADDPSZrrbkz:  [ 0.00  0.00 ]
+Key: VADDPSZrrk:  [ 0.00  0.00 ]
+Key: VADDPSZrrkz:  [ 0.00  0.00 ]
+Key: VADDPSrm:  [ 0.00  0.00 ]
+Key: VADDPSrr:  [ 0.00  0.00 ]
+Key: VADDSDZrm:  [ 0.00  0.00 ]
+Key: VADDSDZrm_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrr:  [ 0.00  0.00 ]
+Key: VADDSDZrr_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VADDSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VADDSDrm:  [ 0.00  0.00 ]
+Key: VADDSDrm_Int:  [ 0.00  0.00 ]
+Key: VADDSDrr:  [ 0.00  0.00 ]
+Key: VADDSDrr_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrm:  [ 0.00  0.00 ]
+Key: VADDSHZrm_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrr:  [ 0.00  0.00 ]
+Key: VADDSHZrr_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VADDSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrm:  [ 0.00  0.00 ]
+Key: VADDSSZrm_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrr:  [ 0.00  0.00 ]
+Key: VADDSSZrr_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VADDSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VADDSSrm:  [ 0.00  0.00 ]
+Key: VADDSSrm_Int:  [ 0.00  0.00 ]
+Key: VADDSSrr:  [ 0.00  0.00 ]
+Key: VADDSSrr_Int:  [ 0.00  0.00 ]
+Key: VADDSUBPDYrm:  [ 0.00  0.00 ]
+Key: VADDSUBPDYrr:  [ 0.00  0.00 ]
+Key: VADDSUBPDrm:  [ 0.00  0.00 ]
+Key: VADDSUBPDrr:  [ 0.00  0.00 ]
+Key: VADDSUBPSYrm:  [ 0.00  0.00 ]
+Key: VADDSUBPSYrr:  [ 0.00  0.00 ]
+Key: VADDSUBPSrm:  [ 0.00  0.00 ]
+Key: VADDSUBPSrr:  [ 0.00  0.00 ]
+Key: VAESDECLASTYrm:  [ 0.00  0.00 ]
+Key: VAESDECLASTYrr:  [ 0.00  0.00 ]
+Key: VAESDECLASTZ:  [ 0.00  0.00 ]
+Key: VAESDECLASTZrm:  [ 0.00  0.00 ]
+Key: VAESDECLASTZrr:  [ 0.00  0.00 ]
+Key: VAESDECLASTrm:  [ 0.00  0.00 ]
+Key: VAESDECLASTrr:  [ 0.00  0.00 ]
+Key: VAESDECYrm:  [ 0.00  0.00 ]
+Key: VAESDECYrr:  [ 0.00  0.00 ]
+Key: VAESDECZ:  [ 0.00  0.00 ]
+Key: VAESDECZrm:  [ 0.00  0.00 ]
+Key: VAESDECZrr:  [ 0.00  0.00 ]
+Key: VAESDECrm:  [ 0.00  0.00 ]
+Key: VAESDECrr:  [ 0.00  0.00 ]
+Key: VAESENCLASTYrm:  [ 0.00  0.00 ]
+Key: VAESENCLASTYrr:  [ 0.00  0.00 ]
+Key: VAESENCLASTZ:  [ 0.00  0.00 ]
+Key: VAESENCLASTZrm:  [ 0.00  0.00 ]
+Key: VAESENCLASTZrr:  [ 0.00  0.00 ]
+Key: VAESENCLASTrm:  [ 0.00  0.00 ]
+Key: VAESENCLASTrr:  [ 0.00  0.00 ]
+Key: VAESENCYrm:  [ 0.00  0.00 ]
+Key: VAESENCYrr:  [ 0.00  0.00 ]
+Key: VAESENCZ:  [ 0.00  0.00 ]
+Key: VAESENCZrm:  [ 0.00  0.00 ]
+Key: VAESENCZrr:  [ 0.00  0.00 ]
+Key: VAESENCrm:  [ 0.00  0.00 ]
+Key: VAESENCrr:  [ 0.00  0.00 ]
+Key: VAESIMCrm:  [ 0.00  0.00 ]
+Key: VAESIMCrr:  [ 0.00  0.00 ]
+Key: VAESKEYGENASSISTrmi:  [ 0.00  0.00 ]
+Key: VAESKEYGENASSISTrri:  [ 0.00  0.00 ]
+Key: VALIGNDZ:  [ 0.00  0.00 ]
+Key: VALIGNDZrmbi:  [ 0.00  0.00 ]
+Key: VALIGNDZrmbik:  [ 0.00  0.00 ]
+Key: VALIGNDZrmbikz:  [ 0.00  0.00 ]
+Key: VALIGNDZrmi:  [ 0.00  0.00 ]
+Key: VALIGNDZrmik:  [ 0.00  0.00 ]
+Key: VALIGNDZrmikz:  [ 0.00  0.00 ]
+Key: VALIGNDZrri:  [ 0.00  0.00 ]
+Key: VALIGNDZrrik:  [ 0.00  0.00 ]
+Key: VALIGNDZrrikz:  [ 0.00  0.00 ]
+Key: VALIGNQZ:  [ 0.00  0.00 ]
+Key: VALIGNQZrmbi:  [ 0.00  0.00 ]
+Key: VALIGNQZrmbik:  [ 0.00  0.00 ]
+Key: VALIGNQZrmbikz:  [ 0.00  0.00 ]
+Key: VALIGNQZrmi:  [ 0.00  0.00 ]
+Key: VALIGNQZrmik:  [ 0.00  0.00 ]
+Key: VALIGNQZrmikz:  [ 0.00  0.00 ]
+Key: VALIGNQZrri:  [ 0.00  0.00 ]
+Key: VALIGNQZrrik:  [ 0.00  0.00 ]
+Key: VALIGNQZrrikz:  [ 0.00  0.00 ]
+Key: VANDNPDYrm:  [ 0.00  0.00 ]
+Key: VANDNPDYrr:  [ 0.00  0.00 ]
+Key: VANDNPDZ:  [ 0.00  0.00 ]
+Key: VANDNPDZrm:  [ 0.00  0.00 ]
+Key: VANDNPDZrmb:  [ 0.00  0.00 ]
+Key: VANDNPDZrmbk:  [ 0.00  0.00 ]
+Key: VANDNPDZrmbkz:  [ 0.00  0.00 ]
+Key: VANDNPDZrmk:  [ 0.00  0.00 ]
+Key: VANDNPDZrmkz:  [ 0.00  0.00 ]
+Key: VANDNPDZrr:  [ 0.00  0.00 ]
+Key: VANDNPDZrrk:  [ 0.00  0.00 ]
+Key: VANDNPDZrrkz:  [ 0.00  0.00 ]
+Key: VANDNPDrm:  [ 0.00  0.00 ]
+Key: VANDNPDrr:  [ 0.00  0.00 ]
+Key: VANDNPSYrm:  [ 0.00  0.00 ]
+Key: VANDNPSYrr:  [ 0.00  0.00 ]
+Key: VANDNPSZ:  [ 0.00  0.00 ]
+Key: VANDNPSZrm:  [ 0.00  0.00 ]
+Key: VANDNPSZrmb:  [ 0.00  0.00 ]
+Key: VANDNPSZrmbk:  [ 0.00  0.00 ]
+Key: VANDNPSZrmbkz:  [ 0.00  0.00 ]
+Key: VANDNPSZrmk:  [ 0.00  0.00 ]
+Key: VANDNPSZrmkz:  [ 0.00  0.00 ]
+Key: VANDNPSZrr:  [ 0.00  0.00 ]
+Key: VANDNPSZrrk:  [ 0.00  0.00 ]
+Key: VANDNPSZrrkz:  [ 0.00  0.00 ]
+Key: VANDNPSrm:  [ 0.00  0.00 ]
+Key: VANDNPSrr:  [ 0.00  0.00 ]
+Key: VANDPDYrm:  [ 0.00  0.00 ]
+Key: VANDPDYrr:  [ 0.00  0.00 ]
+Key: VANDPDZ:  [ 0.00  0.00 ]
+Key: VANDPDZrm:  [ 0.00  0.00 ]
+Key: VANDPDZrmb:  [ 0.00  0.00 ]
+Key: VANDPDZrmbk:  [ 0.00  0.00 ]
+Key: VANDPDZrmbkz:  [ 0.00  0.00 ]
+Key: VANDPDZrmk:  [ 0.00  0.00 ]
+Key: VANDPDZrmkz:  [ 0.00  0.00 ]
+Key: VANDPDZrr:  [ 0.00  0.00 ]
+Key: VANDPDZrrk:  [ 0.00  0.00 ]
+Key: VANDPDZrrkz:  [ 0.00  0.00 ]
+Key: VANDPDrm:  [ 0.00  0.00 ]
+Key: VANDPDrr:  [ 0.00  0.00 ]
+Key: VANDPSYrm:  [ 0.00  0.00 ]
+Key: VANDPSYrr:  [ 0.00  0.00 ]
+Key: VANDPSZ:  [ 0.00  0.00 ]
+Key: VANDPSZrm:  [ 0.00  0.00 ]
+Key: VANDPSZrmb:  [ 0.00  0.00 ]
+Key: VANDPSZrmbk:  [ 0.00  0.00 ]
+Key: VANDPSZrmbkz:  [ 0.00  0.00 ]
+Key: VANDPSZrmk:  [ 0.00  0.00 ]
+Key: VANDPSZrmkz:  [ 0.00  0.00 ]
+Key: VANDPSZrr:  [ 0.00  0.00 ]
+Key: VANDPSZrrk:  [ 0.00  0.00 ]
+Key: VANDPSZrrkz:  [ 0.00  0.00 ]
+Key: VANDPSrm:  [ 0.00  0.00 ]
+Key: VANDPSrr:  [ 0.00  0.00 ]
+Key: VASTART_SAVE_XMM_REGS:  [ 0.00  0.00 ]
+Key: VBCSTNEBF:  [ 0.00  0.00 ]
+Key: VBCSTNESH:  [ 0.00  0.00 ]
+Key: VBLENDMPDZ:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrm:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmb:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmbk:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmbkz:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmk:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrmkz:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrr:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrrk:  [ 0.00  0.00 ]
+Key: VBLENDMPDZrrkz:  [ 0.00  0.00 ]
+Key: VBLENDMPSZ:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrm:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmb:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmbk:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmbkz:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmk:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrmkz:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrr:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrrk:  [ 0.00  0.00 ]
+Key: VBLENDMPSZrrkz:  [ 0.00  0.00 ]
+Key: VBLENDPDYrmi:  [ 0.00  0.00 ]
+Key: VBLENDPDYrri:  [ 0.00  0.00 ]
+Key: VBLENDPDrmi:  [ 0.00  0.00 ]
+Key: VBLENDPDrri:  [ 0.00  0.00 ]
+Key: VBLENDPSYrmi:  [ 0.00  0.00 ]
+Key: VBLENDPSYrri:  [ 0.00  0.00 ]
+Key: VBLENDPSrmi:  [ 0.00  0.00 ]
+Key: VBLENDPSrri:  [ 0.00  0.00 ]
+Key: VBLENDVPDYrmr:  [ 0.00  0.00 ]
+Key: VBLENDVPDYrrr:  [ 0.00  0.00 ]
+Key: VBLENDVPDrmr:  [ 0.00  0.00 ]
+Key: VBLENDVPDrrr:  [ 0.00  0.00 ]
+Key: VBLENDVPSYrmr:  [ 0.00  0.00 ]
+Key: VBLENDVPSYrrr:  [ 0.00  0.00 ]
+Key: VBLENDVPSrmr:  [ 0.00  0.00 ]
+Key: VBLENDVPSrrr:  [ 0.00  0.00 ]
+Key: VBROADCASTF:  [ 0.00  0.00 ]
+Key: VBROADCASTI:  [ 0.00  0.00 ]
+Key: VBROADCASTSDYrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSDYrr:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZ:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrmk:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrmkz:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrr:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrrk:  [ 0.00  0.00 ]
+Key: VBROADCASTSDZrrkz:  [ 0.00  0.00 ]
+Key: VBROADCASTSSYrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSSYrr:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZ:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrmk:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrmkz:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrr:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrrk:  [ 0.00  0.00 ]
+Key: VBROADCASTSSZrrkz:  [ 0.00  0.00 ]
+Key: VBROADCASTSSrm:  [ 0.00  0.00 ]
+Key: VBROADCASTSSrr:  [ 0.00  0.00 ]
+Key: VCMPBF:  [ 0.00  0.00 ]
+Key: VCMPPDYrmi:  [ 0.00  0.00 ]
+Key: VCMPPDYrri:  [ 0.00  0.00 ]
+Key: VCMPPDZ:  [ 0.00  0.00 ]
+Key: VCMPPDZrmbi:  [ 0.00  0.00 ]
+Key: VCMPPDZrmbik:  [ 0.00  0.00 ]
+Key: VCMPPDZrmi:  [ 0.00  0.00 ]
+Key: VCMPPDZrmik:  [ 0.00  0.00 ]
+Key: VCMPPDZrri:  [ 0.00  0.00 ]
+Key: VCMPPDZrrib:  [ 0.00  0.00 ]
+Key: VCMPPDZrribk:  [ 0.00  0.00 ]
+Key: VCMPPDZrrik:  [ 0.00  0.00 ]
+Key: VCMPPDrmi:  [ 0.00  0.00 ]
+Key: VCMPPDrri:  [ 0.00  0.00 ]
+Key: VCMPPHZ:  [ 0.00  0.00 ]
+Key: VCMPPHZrmbi:  [ 0.00  0.00 ]
+Key: VCMPPHZrmbik:  [ 0.00  0.00 ]
+Key: VCMPPHZrmi:  [ 0.00  0.00 ]
+Key: VCMPPHZrmik:  [ 0.00  0.00 ]
+Key: VCMPPHZrri:  [ 0.00  0.00 ]
+Key: VCMPPHZrrib:  [ 0.00  0.00 ]
+Key: VCMPPHZrribk:  [ 0.00  0.00 ]
+Key: VCMPPHZrrik:  [ 0.00  0.00 ]
+Key: VCMPPSYrmi:  [ 0.00  0.00 ]
+Key: VCMPPSYrri:  [ 0.00  0.00 ]
+Key: VCMPPSZ:  [ 0.00  0.00 ]
+Key: VCMPPSZrmbi:  [ 0.00  0.00 ]
+Key: VCMPPSZrmbik:  [ 0.00  0.00 ]
+Key: VCMPPSZrmi:  [ 0.00  0.00 ]
+Key: VCMPPSZrmik:  [ 0.00  0.00 ]
+Key: VCMPPSZrri:  [ 0.00  0.00 ]
+Key: VCMPPSZrrib:  [ 0.00  0.00 ]
+Key: VCMPPSZrribk:  [ 0.00  0.00 ]
+Key: VCMPPSZrrik:  [ 0.00  0.00 ]
+Key: VCMPPSrmi:  [ 0.00  0.00 ]
+Key: VCMPPSrri:  [ 0.00  0.00 ]
+Key: VCMPSDZrmi:  [ 0.00  0.00 ]
+Key: VCMPSDZrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrmik_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrri:  [ 0.00  0.00 ]
+Key: VCMPSDZrri_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrrib_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrribk_Int:  [ 0.00  0.00 ]
+Key: VCMPSDZrrik_Int:  [ 0.00  0.00 ]
+Key: VCMPSDrmi:  [ 0.00  0.00 ]
+Key: VCMPSDrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSDrri:  [ 0.00  0.00 ]
+Key: VCMPSDrri_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrmi:  [ 0.00  0.00 ]
+Key: VCMPSHZrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrmik_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrri:  [ 0.00  0.00 ]
+Key: VCMPSHZrri_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrrib_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrribk_Int:  [ 0.00  0.00 ]
+Key: VCMPSHZrrik_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrmi:  [ 0.00  0.00 ]
+Key: VCMPSSZrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrmik_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrri:  [ 0.00  0.00 ]
+Key: VCMPSSZrri_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrrib_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrribk_Int:  [ 0.00  0.00 ]
+Key: VCMPSSZrrik_Int:  [ 0.00  0.00 ]
+Key: VCMPSSrmi:  [ 0.00  0.00 ]
+Key: VCMPSSrmi_Int:  [ 0.00  0.00 ]
+Key: VCMPSSrri:  [ 0.00  0.00 ]
+Key: VCMPSSrri_Int:  [ 0.00  0.00 ]
+Key: VCOMISBF:  [ 0.00  0.00 ]
+Key: VCOMISDZrm:  [ 0.00  0.00 ]
+Key: VCOMISDZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISDZrr:  [ 0.00  0.00 ]
+Key: VCOMISDZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMISDZrrb:  [ 0.00  0.00 ]
+Key: VCOMISDrm:  [ 0.00  0.00 ]
+Key: VCOMISDrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISDrr:  [ 0.00  0.00 ]
+Key: VCOMISDrr_Int:  [ 0.00  0.00 ]
+Key: VCOMISHZrm:  [ 0.00  0.00 ]
+Key: VCOMISHZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISHZrr:  [ 0.00  0.00 ]
+Key: VCOMISHZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMISHZrrb:  [ 0.00  0.00 ]
+Key: VCOMISSZrm:  [ 0.00  0.00 ]
+Key: VCOMISSZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISSZrr:  [ 0.00  0.00 ]
+Key: VCOMISSZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMISSZrrb:  [ 0.00  0.00 ]
+Key: VCOMISSrm:  [ 0.00  0.00 ]
+Key: VCOMISSrm_Int:  [ 0.00  0.00 ]
+Key: VCOMISSrr:  [ 0.00  0.00 ]
+Key: VCOMISSrr_Int:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZ:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZmr:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZmrk:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZrr:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZrrk:  [ 0.00  0.00 ]
+Key: VCOMPRESSPDZrrkz:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZ:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZmr:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZmrk:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZrr:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZrrk:  [ 0.00  0.00 ]
+Key: VCOMPRESSPSZrrkz:  [ 0.00  0.00 ]
+Key: VCOMXSDZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMXSDZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMXSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VCOMXSHZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMXSHZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMXSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VCOMXSSZrm_Int:  [ 0.00  0.00 ]
+Key: VCOMXSSZrr_Int:  [ 0.00  0.00 ]
+Key: VCOMXSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VCVT:  [ 0.00  0.00 ]
+Key: VCVTBF:  [ 0.00  0.00 ]
+Key: VCVTBIASPH:  [ 0.00  0.00 ]
+Key: VCVTDQ:  [ 0.00  0.00 ]
+Key: VCVTHF:  [ 0.00  0.00 ]
+Key: VCVTNE:  [ 0.00  0.00 ]
+Key: VCVTNEEBF:  [ 0.00  0.00 ]
+Key: VCVTNEEPH:  [ 0.00  0.00 ]
+Key: VCVTNEOBF:  [ 0.00  0.00 ]
+Key: VCVTNEOPH:  [ 0.00  0.00 ]
+Key: VCVTNEPS:  [ 0.00  0.00 ]
+Key: VCVTPD:  [ 0.00  0.00 ]
+Key: VCVTPH:  [ 0.00  0.00 ]
+Key: VCVTPS:  [ 0.00  0.00 ]
+Key: VCVTQQ:  [ 0.00  0.00 ]
+Key: VCVTSD:  [ 0.00  0.00 ]
+Key: VCVTSH:  [ 0.00  0.00 ]
+Key: VCVTSI:  [ 0.00  0.00 ]
+Key: VCVTSS:  [ 0.00  0.00 ]
+Key: VCVTTBF:  [ 0.00  0.00 ]
+Key: VCVTTPD:  [ 0.00  0.00 ]
+Key: VCVTTPH:  [ 0.00  0.00 ]
+Key: VCVTTPS:  [ 0.00  0.00 ]
+Key: VCVTTSD:  [ 0.00  0.00 ]
+Key: VCVTTSH:  [ 0.00  0.00 ]
+Key: VCVTTSS:  [ 0.00  0.00 ]
+Key: VCVTUDQ:  [ 0.00  0.00 ]
+Key: VCVTUQQ:  [ 0.00  0.00 ]
+Key: VCVTUSI:  [ 0.00  0.00 ]
+Key: VCVTUW:  [ 0.00  0.00 ]
+Key: VCVTW:  [ 0.00  0.00 ]
+Key: VDBPSADBWZ:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrmi:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrmik:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrmikz:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrri:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrrik:  [ 0.00  0.00 ]
+Key: VDBPSADBWZrrikz:  [ 0.00  0.00 ]
+Key: VDIVBF:  [ 0.00  0.00 ]
+Key: VDIVPDYrm:  [ 0.00  0.00 ]
+Key: VDIVPDYrr:  [ 0.00  0.00 ]
+Key: VDIVPDZ:  [ 0.00  0.00 ]
+Key: VDIVPDZrm:  [ 0.00  0.00 ]
+Key: VDIVPDZrmb:  [ 0.00  0.00 ]
+Key: VDIVPDZrmbk:  [ 0.00  0.00 ]
+Key: VDIVPDZrmbkz:  [ 0.00  0.00 ]
+Key: VDIVPDZrmk:  [ 0.00  0.00 ]
+Key: VDIVPDZrmkz:  [ 0.00  0.00 ]
+Key: VDIVPDZrr:  [ 0.00  0.00 ]
+Key: VDIVPDZrrb:  [ 0.00  0.00 ]
+Key: VDIVPDZrrbk:  [ 0.00  0.00 ]
+Key: VDIVPDZrrbkz:  [ 0.00  0.00 ]
+Key: VDIVPDZrrk:  [ 0.00  0.00 ]
+Key: VDIVPDZrrkz:  [ 0.00  0.00 ]
+Key: VDIVPDrm:  [ 0.00  0.00 ]
+Key: VDIVPDrr:  [ 0.00  0.00 ]
+Key: VDIVPHZ:  [ 0.00  0.00 ]
+Key: VDIVPHZrm:  [ 0.00  0.00 ]
+Key: VDIVPHZrmb:  [ 0.00  0.00 ]
+Key: VDIVPHZrmbk:  [ 0.00  0.00 ]
+Key: VDIVPHZrmbkz:  [ 0.00  0.00 ]
+Key: VDIVPHZrmk:  [ 0.00  0.00 ]
+Key: VDIVPHZrmkz:  [ 0.00  0.00 ]
+Key: VDIVPHZrr:  [ 0.00  0.00 ]
+Key: VDIVPHZrrb:  [ 0.00  0.00 ]
+Key: VDIVPHZrrbk:  [ 0.00  0.00 ]
+Key: VDIVPHZrrbkz:  [ 0.00  0.00 ]
+Key: VDIVPHZrrk:  [ 0.00  0.00 ]
+Key: VDIVPHZrrkz:  [ 0.00  0.00 ]
+Key: VDIVPSYrm:  [ 0.00  0.00 ]
+Key: VDIVPSYrr:  [ 0.00  0.00 ]
+Key: VDIVPSZ:  [ 0.00  0.00 ]
+Key: VDIVPSZrm:  [ 0.00  0.00 ]
+Key: VDIVPSZrmb:  [ 0.00  0.00 ]
+Key: VDIVPSZrmbk:  [ 0.00  0.00 ]
+Key: VDIVPSZrmbkz:  [ 0.00  0.00 ]
+Key: VDIVPSZrmk:  [ 0.00  0.00 ]
+Key: VDIVPSZrmkz:  [ 0.00  0.00 ]
+Key: VDIVPSZrr:  [ 0.00  0.00 ]
+Key: VDIVPSZrrb:  [ 0.00  0.00 ]
+Key: VDIVPSZrrbk:  [ 0.00  0.00 ]
+Key: VDIVPSZrrbkz:  [ 0.00  0.00 ]
+Key: VDIVPSZrrk:  [ 0.00  0.00 ]
+Key: VDIVPSZrrkz:  [ 0.00  0.00 ]
+Key: VDIVPSrm:  [ 0.00  0.00 ]
+Key: VDIVPSrr:  [ 0.00  0.00 ]
+Key: VDIVSDZrm:  [ 0.00  0.00 ]
+Key: VDIVSDZrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrr:  [ 0.00  0.00 ]
+Key: VDIVSDZrr_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VDIVSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSDrm:  [ 0.00  0.00 ]
+Key: VDIVSDrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSDrr:  [ 0.00  0.00 ]
+Key: VDIVSDrr_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrm:  [ 0.00  0.00 ]
+Key: VDIVSHZrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrr:  [ 0.00  0.00 ]
+Key: VDIVSHZrr_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VDIVSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrm:  [ 0.00  0.00 ]
+Key: VDIVSSZrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrr:  [ 0.00  0.00 ]
+Key: VDIVSSZrr_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VDIVSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VDIVSSrm:  [ 0.00  0.00 ]
+Key: VDIVSSrm_Int:  [ 0.00  0.00 ]
+Key: VDIVSSrr:  [ 0.00  0.00 ]
+Key: VDIVSSrr_Int:  [ 0.00  0.00 ]
+Key: VDPBF:  [ 0.00  0.00 ]
+Key: VDPPDrmi:  [ 0.00  0.00 ]
+Key: VDPPDrri:  [ 0.00  0.00 ]
+Key: VDPPHPSZ:  [ 0.00  0.00 ]
+Key: VDPPHPSZm:  [ 0.00  0.00 ]
+Key: VDPPHPSZmb:  [ 0.00  0.00 ]
+Key: VDPPHPSZmbk:  [ 0.00  0.00 ]
+Key: VDPPHPSZmbkz:  [ 0.00  0.00 ]
+Key: VDPPHPSZmk:  [ 0.00  0.00 ]
+Key: VDPPHPSZmkz:  [ 0.00  0.00 ]
+Key: VDPPHPSZr:  [ 0.00  0.00 ]
+Key: VDPPHPSZrk:  [ 0.00  0.00 ]
+Key: VDPPHPSZrkz:  [ 0.00  0.00 ]
+Key: VDPPSYrmi:  [ 0.00  0.00 ]
+Key: VDPPSYrri:  [ 0.00  0.00 ]
+Key: VDPPSrmi:  [ 0.00  0.00 ]
+Key: VDPPSrri:  [ 0.00  0.00 ]
+Key: VERRm:  [ 0.00  0.00 ]
+Key: VERRr:  [ 0.00  0.00 ]
+Key: VERWm:  [ 0.00  0.00 ]
+Key: VERWr:  [ 0.00  0.00 ]
+Key: VEXP:  [ 0.00  0.00 ]
+Key: VEXPANDPDZ:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrm:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrmk:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrmkz:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrr:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrrk:  [ 0.00  0.00 ]
+Key: VEXPANDPDZrrkz:  [ 0.00  0.00 ]
+Key: VEXPANDPSZ:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrm:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrmk:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrmkz:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrr:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrrk:  [ 0.00  0.00 ]
+Key: VEXPANDPSZrrkz:  [ 0.00  0.00 ]
+Key: VEXTRACTF:  [ 0.00  0.00 ]
+Key: VEXTRACTI:  [ 0.00  0.00 ]
+Key: VEXTRACTPSZmri:  [ 0.00  0.00 ]
+Key: VEXTRACTPSZrri:  [ 0.00  0.00 ]
+Key: VEXTRACTPSmri:  [ 0.00  0.00 ]
+Key: VEXTRACTPSrri:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZ:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZm:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmb:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmbk:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmbkz:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmk:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZmkz:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZr:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrb:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrbk:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrbkz:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrk:  [ 0.00  0.00 ]
+Key: VFCMADDCPHZrkz:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZm:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZmk:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZmkz:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZr:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrb:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrbk:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrbkz:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrk:  [ 0.00  0.00 ]
+Key: VFCMADDCSHZrkz:  [ 0.00  0.00 ]
+Key: VFCMULCPHZ:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrm:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmb:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmbk:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmbkz:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmk:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrmkz:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrr:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrb:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrbk:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrbkz:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrk:  [ 0.00  0.00 ]
+Key: VFCMULCPHZrrkz:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrm:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrmk:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrmkz:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrr:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrb:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrbk:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrbkz:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrk:  [ 0.00  0.00 ]
+Key: VFCMULCSHZrrkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZ:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmbi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmbik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmbikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrmikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrri:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrrib:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrribk:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrribkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrrik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPDZrrikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZ:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmbi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmbik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmbikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrmikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrri:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrrib:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrribk:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrribkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrrik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMPSZrrikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrmi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrmik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrmikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrri:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrrib:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrribk:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrribkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrrik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSDZrrikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrmi:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrmik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrmikz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrri:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrrib:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrribk:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrribkz:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrrik:  [ 0.00  0.00 ]
+Key: VFIXUPIMMSSZrrikz:  [ 0.00  0.00 ]
+Key: VFMADD:  [ 0.00  0.00 ]
+Key: VFMADDCPHZ:  [ 0.00  0.00 ]
+Key: VFMADDCPHZm:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmb:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmbk:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmbkz:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmk:  [ 0.00  0.00 ]
+Key: VFMADDCPHZmkz:  [ 0.00  0.00 ]
+Key: VFMADDCPHZr:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrb:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrbk:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrbkz:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrk:  [ 0.00  0.00 ]
+Key: VFMADDCPHZrkz:  [ 0.00  0.00 ]
+Key: VFMADDCSHZm:  [ 0.00  0.00 ]
+Key: VFMADDCSHZmk:  [ 0.00  0.00 ]
+Key: VFMADDCSHZmkz:  [ 0.00  0.00 ]
+Key: VFMADDCSHZr:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrb:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrbk:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrbkz:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrk:  [ 0.00  0.00 ]
+Key: VFMADDCSHZrkz:  [ 0.00  0.00 ]
+Key: VFMADDPD:  [ 0.00  0.00 ]
+Key: VFMADDPS:  [ 0.00  0.00 ]
+Key: VFMADDSD:  [ 0.00  0.00 ]
+Key: VFMADDSS:  [ 0.00  0.00 ]
+Key: VFMADDSUB:  [ 0.00  0.00 ]
+Key: VFMADDSUBPD:  [ 0.00  0.00 ]
+Key: VFMADDSUBPS:  [ 0.00  0.00 ]
+Key: VFMSUB:  [ 0.00  0.00 ]
+Key: VFMSUBADD:  [ 0.00  0.00 ]
+Key: VFMSUBADDPD:  [ 0.00  0.00 ]
+Key: VFMSUBADDPS:  [ 0.00  0.00 ]
+Key: VFMSUBPD:  [ 0.00  0.00 ]
+Key: VFMSUBPS:  [ 0.00  0.00 ]
+Key: VFMSUBSD:  [ 0.00  0.00 ]
+Key: VFMSUBSS:  [ 0.00  0.00 ]
+Key: VFMULCPHZ:  [ 0.00  0.00 ]
+Key: VFMULCPHZrm:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmb:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmbk:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmbkz:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmk:  [ 0.00  0.00 ]
+Key: VFMULCPHZrmkz:  [ 0.00  0.00 ]
+Key: VFMULCPHZrr:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrb:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrbk:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrbkz:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrk:  [ 0.00  0.00 ]
+Key: VFMULCPHZrrkz:  [ 0.00  0.00 ]
+Key: VFMULCSHZrm:  [ 0.00  0.00 ]
+Key: VFMULCSHZrmk:  [ 0.00  0.00 ]
+Key: VFMULCSHZrmkz:  [ 0.00  0.00 ]
+Key: VFMULCSHZrr:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrb:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrbk:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrbkz:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrk:  [ 0.00  0.00 ]
+Key: VFMULCSHZrrkz:  [ 0.00  0.00 ]
+Key: VFNMADD:  [ 0.00  0.00 ]
+Key: VFNMADDPD:  [ 0.00  0.00 ]
+Key: VFNMADDPS:  [ 0.00  0.00 ]
+Key: VFNMADDSD:  [ 0.00  0.00 ]
+Key: VFNMADDSS:  [ 0.00  0.00 ]
+Key: VFNMSUB:  [ 0.00  0.00 ]
+Key: VFNMSUBPD:  [ 0.00  0.00 ]
+Key: VFNMSUBPS:  [ 0.00  0.00 ]
+Key: VFNMSUBSD:  [ 0.00  0.00 ]
+Key: VFNMSUBSS:  [ 0.00  0.00 ]
+Key: VFPCLASSBF:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZ:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZmbi:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZmbik:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZri:  [ 0.00  0.00 ]
+Key: VFPCLASSPDZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZ:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZmbi:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZmbik:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZri:  [ 0.00  0.00 ]
+Key: VFPCLASSPHZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZ:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZmbi:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZmbik:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZri:  [ 0.00  0.00 ]
+Key: VFPCLASSPSZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSSDZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSSDZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSSDZri:  [ 0.00  0.00 ]
+Key: VFPCLASSSDZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSSHZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSSHZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSSHZri:  [ 0.00  0.00 ]
+Key: VFPCLASSSHZrik:  [ 0.00  0.00 ]
+Key: VFPCLASSSSZmi:  [ 0.00  0.00 ]
+Key: VFPCLASSSSZmik:  [ 0.00  0.00 ]
+Key: VFPCLASSSSZri:  [ 0.00  0.00 ]
+Key: VFPCLASSSSZrik:  [ 0.00  0.00 ]
+Key: VFRCZPDYrm:  [ 0.00  0.00 ]
+Key: VFRCZPDYrr:  [ 0.00  0.00 ]
+Key: VFRCZPDrm:  [ 0.00  0.00 ]
+Key: VFRCZPDrr:  [ 0.00  0.00 ]
+Key: VFRCZPSYrm:  [ 0.00  0.00 ]
+Key: VFRCZPSYrr:  [ 0.00  0.00 ]
+Key: VFRCZPSrm:  [ 0.00  0.00 ]
+Key: VFRCZPSrr:  [ 0.00  0.00 ]
+Key: VFRCZSDrm:  [ 0.00  0.00 ]
+Key: VFRCZSDrr:  [ 0.00  0.00 ]
+Key: VFRCZSSrm:  [ 0.00  0.00 ]
+Key: VFRCZSSrr:  [ 0.00  0.00 ]
+Key: VGATHERDPDYrm:  [ 0.00  0.00 ]
+Key: VGATHERDPDZ:  [ 0.00  0.00 ]
+Key: VGATHERDPDZrm:  [ 0.00  0.00 ]
+Key: VGATHERDPDrm:  [ 0.00  0.00 ]
+Key: VGATHERDPSYrm:  [ 0.00  0.00 ]
+Key: VGATHERDPSZ:  [ 0.00  0.00 ]
+Key: VGATHERDPSZrm:  [ 0.00  0.00 ]
+Key: VGATHERDPSrm:  [ 0.00  0.00 ]
+Key: VGATHERPF:  [ 0.00  0.00 ]
+Key: VGATHERQPDYrm:  [ 0.00  0.00 ]
+Key: VGATHERQPDZ:  [ 0.00  0.00 ]
+Key: VGATHERQPDZrm:  [ 0.00  0.00 ]
+Key: VGATHERQPDrm:  [ 0.00  0.00 ]
+Key: VGATHERQPSYrm:  [ 0.00  0.00 ]
+Key: VGATHERQPSZ:  [ 0.00  0.00 ]
+Key: VGATHERQPSZrm:  [ 0.00  0.00 ]
+Key: VGATHERQPSrm:  [ 0.00  0.00 ]
+Key: VGETEXPBF:  [ 0.00  0.00 ]
+Key: VGETEXPPDZ:  [ 0.00  0.00 ]
+Key: VGETEXPPDZm:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmb:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmbk:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmk:  [ 0.00  0.00 ]
+Key: VGETEXPPDZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPPDZr:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrb:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrk:  [ 0.00  0.00 ]
+Key: VGETEXPPDZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPPHZ:  [ 0.00  0.00 ]
+Key: VGETEXPPHZm:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmb:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmbk:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmk:  [ 0.00  0.00 ]
+Key: VGETEXPPHZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPPHZr:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrb:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrk:  [ 0.00  0.00 ]
+Key: VGETEXPPHZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPPSZ:  [ 0.00  0.00 ]
+Key: VGETEXPPSZm:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmb:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmbk:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmk:  [ 0.00  0.00 ]
+Key: VGETEXPPSZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPPSZr:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrb:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrk:  [ 0.00  0.00 ]
+Key: VGETEXPPSZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPSDZm:  [ 0.00  0.00 ]
+Key: VGETEXPSDZmk:  [ 0.00  0.00 ]
+Key: VGETEXPSDZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPSDZr:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrb:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrk:  [ 0.00  0.00 ]
+Key: VGETEXPSDZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPSHZm:  [ 0.00  0.00 ]
+Key: VGETEXPSHZmk:  [ 0.00  0.00 ]
+Key: VGETEXPSHZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPSHZr:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrb:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrk:  [ 0.00  0.00 ]
+Key: VGETEXPSHZrkz:  [ 0.00  0.00 ]
+Key: VGETEXPSSZm:  [ 0.00  0.00 ]
+Key: VGETEXPSSZmk:  [ 0.00  0.00 ]
+Key: VGETEXPSSZmkz:  [ 0.00  0.00 ]
+Key: VGETEXPSSZr:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrb:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrbk:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrbkz:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrk:  [ 0.00  0.00 ]
+Key: VGETEXPSSZrkz:  [ 0.00  0.00 ]
+Key: VGETMANTBF:  [ 0.00  0.00 ]
+Key: VGETMANTPDZ:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmbi:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmbik:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmbikz:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrri:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTPDZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTPHZ:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmbi:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmbik:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmbikz:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrri:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTPHZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTPSZ:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmbi:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmbik:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmbikz:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrri:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTPSZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrri:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTSDZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrri:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTSHZrrikz:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrmi:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrmik:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrmikz:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrri:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrrib:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrribk:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrribkz:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrrik:  [ 0.00  0.00 ]
+Key: VGETMANTSSZrrikz:  [ 0.00  0.00 ]
+Key: VGF:  [ 0.00  0.00 ]
+Key: VHADDPDYrm:  [ 0.00  0.00 ]
+Key: VHADDPDYrr:  [ 0.00  0.00 ]
+Key: VHADDPDrm:  [ 0.00  0.00 ]
+Key: VHADDPDrr:  [ 0.00  0.00 ]
+Key: VHADDPSYrm:  [ 0.00  0.00 ]
+Key: VHADDPSYrr:  [ 0.00  0.00 ]
+Key: VHADDPSrm:  [ 0.00  0.00 ]
+Key: VHADDPSrr:  [ 0.00  0.00 ]
+Key: VHSUBPDYrm:  [ 0.00  0.00 ]
+Key: VHSUBPDYrr:  [ 0.00  0.00 ]
+Key: VHSUBPDrm:  [ 0.00  0.00 ]
+Key: VHSUBPDrr:  [ 0.00  0.00 ]
+Key: VHSUBPSYrm:  [ 0.00  0.00 ]
+Key: VHSUBPSYrr:  [ 0.00  0.00 ]
+Key: VHSUBPSrm:  [ 0.00  0.00 ]
+Key: VHSUBPSrr:  [ 0.00  0.00 ]
+Key: VINSERTF:  [ 0.00  0.00 ]
+Key: VINSERTI:  [ 0.00  0.00 ]
+Key: VINSERTPSZrmi:  [ 0.00  0.00 ]
+Key: VINSERTPSZrri:  [ 0.00  0.00 ]
+Key: VINSERTPSrmi:  [ 0.00  0.00 ]
+Key: VINSERTPSrri:  [ 0.00  0.00 ]
+Key: VLDDQUYrm:  [ 0.00  0.00 ]
+Key: VLDDQUrm:  [ 0.00  0.00 ]
+Key: VLDMXCSR:  [ 0.00  0.00 ]
+Key: VMASKMOVDQU:  [ 0.00  0.00 ]
+Key: VMASKMOVPDYmr:  [ 0.00  0.00 ]
+Key: VMASKMOVPDYrm:  [ 0.00  0.00 ]
+Key: VMASKMOVPDmr:  [ 0.00  0.00 ]
+Key: VMASKMOVPDrm:  [ 0.00  0.00 ]
+Key: VMASKMOVPSYmr:  [ 0.00  0.00 ]
+Key: VMASKMOVPSYrm:  [ 0.00  0.00 ]
+Key: VMASKMOVPSmr:  [ 0.00  0.00 ]
+Key: VMASKMOVPSrm:  [ 0.00  0.00 ]
+Key: VMAXBF:  [ 0.00  0.00 ]
+Key: VMAXCPDYrm:  [ 0.00  0.00 ]
+Key: VMAXCPDYrr:  [ 0.00  0.00 ]
+Key: VMAXCPDZ:  [ 0.00  0.00 ]
+Key: VMAXCPDZrm:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmb:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmbk:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmk:  [ 0.00  0.00 ]
+Key: VMAXCPDZrmkz:  [ 0.00  0.00 ]
+Key: VMAXCPDZrr:  [ 0.00  0.00 ]
+Key: VMAXCPDZrrk:  [ 0.00  0.00 ]
+Key: VMAXCPDZrrkz:  [ 0.00  0.00 ]
+Key: VMAXCPDrm:  [ 0.00  0.00 ]
+Key: VMAXCPDrr:  [ 0.00  0.00 ]
+Key: VMAXCPHZ:  [ 0.00  0.00 ]
+Key: VMAXCPHZrm:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmb:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmbk:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmk:  [ 0.00  0.00 ]
+Key: VMAXCPHZrmkz:  [ 0.00  0.00 ]
+Key: VMAXCPHZrr:  [ 0.00  0.00 ]
+Key: VMAXCPHZrrk:  [ 0.00  0.00 ]
+Key: VMAXCPHZrrkz:  [ 0.00  0.00 ]
+Key: VMAXCPSYrm:  [ 0.00  0.00 ]
+Key: VMAXCPSYrr:  [ 0.00  0.00 ]
+Key: VMAXCPSZ:  [ 0.00  0.00 ]
+Key: VMAXCPSZrm:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmb:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmbk:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmk:  [ 0.00  0.00 ]
+Key: VMAXCPSZrmkz:  [ 0.00  0.00 ]
+Key: VMAXCPSZrr:  [ 0.00  0.00 ]
+Key: VMAXCPSZrrk:  [ 0.00  0.00 ]
+Key: VMAXCPSZrrkz:  [ 0.00  0.00 ]
+Key: VMAXCPSrm:  [ 0.00  0.00 ]
+Key: VMAXCPSrr:  [ 0.00  0.00 ]
+Key: VMAXCSDZrm:  [ 0.00  0.00 ]
+Key: VMAXCSDZrr:  [ 0.00  0.00 ]
+Key: VMAXCSDrm:  [ 0.00  0.00 ]
+Key: VMAXCSDrr:  [ 0.00  0.00 ]
+Key: VMAXCSHZrm:  [ 0.00  0.00 ]
+Key: VMAXCSHZrr:  [ 0.00  0.00 ]
+Key: VMAXCSSZrm:  [ 0.00  0.00 ]
+Key: VMAXCSSZrr:  [ 0.00  0.00 ]
+Key: VMAXCSSrm:  [ 0.00  0.00 ]
+Key: VMAXCSSrr:  [ 0.00  0.00 ]
+Key: VMAXPDYrm:  [ 0.00  0.00 ]
+Key: VMAXPDYrr:  [ 0.00  0.00 ]
+Key: VMAXPDZ:  [ 0.00  0.00 ]
+Key: VMAXPDZrm:  [ 0.00  0.00 ]
+Key: VMAXPDZrmb:  [ 0.00  0.00 ]
+Key: VMAXPDZrmbk:  [ 0.00  0.00 ]
+Key: VMAXPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXPDZrmk:  [ 0.00  0.00 ]
+Key: VMAXPDZrmkz:  [ 0.00  0.00 ]
+Key: VMAXPDZrr:  [ 0.00  0.00 ]
+Key: VMAXPDZrrb:  [ 0.00  0.00 ]
+Key: VMAXPDZrrbk:  [ 0.00  0.00 ]
+Key: VMAXPDZrrbkz:  [ 0.00  0.00 ]
+Key: VMAXPDZrrk:  [ 0.00  0.00 ]
+Key: VMAXPDZrrkz:  [ 0.00  0.00 ]
+Key: VMAXPDrm:  [ 0.00  0.00 ]
+Key: VMAXPDrr:  [ 0.00  0.00 ]
+Key: VMAXPHZ:  [ 0.00  0.00 ]
+Key: VMAXPHZrm:  [ 0.00  0.00 ]
+Key: VMAXPHZrmb:  [ 0.00  0.00 ]
+Key: VMAXPHZrmbk:  [ 0.00  0.00 ]
+Key: VMAXPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXPHZrmk:  [ 0.00  0.00 ]
+Key: VMAXPHZrmkz:  [ 0.00  0.00 ]
+Key: VMAXPHZrr:  [ 0.00  0.00 ]
+Key: VMAXPHZrrb:  [ 0.00  0.00 ]
+Key: VMAXPHZrrbk:  [ 0.00  0.00 ]
+Key: VMAXPHZrrbkz:  [ 0.00  0.00 ]
+Key: VMAXPHZrrk:  [ 0.00  0.00 ]
+Key: VMAXPHZrrkz:  [ 0.00  0.00 ]
+Key: VMAXPSYrm:  [ 0.00  0.00 ]
+Key: VMAXPSYrr:  [ 0.00  0.00 ]
+Key: VMAXPSZ:  [ 0.00  0.00 ]
+Key: VMAXPSZrm:  [ 0.00  0.00 ]
+Key: VMAXPSZrmb:  [ 0.00  0.00 ]
+Key: VMAXPSZrmbk:  [ 0.00  0.00 ]
+Key: VMAXPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMAXPSZrmk:  [ 0.00  0.00 ]
+Key: VMAXPSZrmkz:  [ 0.00  0.00 ]
+Key: VMAXPSZrr:  [ 0.00  0.00 ]
+Key: VMAXPSZrrb:  [ 0.00  0.00 ]
+Key: VMAXPSZrrbk:  [ 0.00  0.00 ]
+Key: VMAXPSZrrbkz:  [ 0.00  0.00 ]
+Key: VMAXPSZrrk:  [ 0.00  0.00 ]
+Key: VMAXPSZrrkz:  [ 0.00  0.00 ]
+Key: VMAXPSrm:  [ 0.00  0.00 ]
+Key: VMAXPSrr:  [ 0.00  0.00 ]
+Key: VMAXSDZrm:  [ 0.00  0.00 ]
+Key: VMAXSDZrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrr:  [ 0.00  0.00 ]
+Key: VMAXSDZrr_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VMAXSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSDrm:  [ 0.00  0.00 ]
+Key: VMAXSDrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSDrr:  [ 0.00  0.00 ]
+Key: VMAXSDrr_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrm:  [ 0.00  0.00 ]
+Key: VMAXSHZrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrr:  [ 0.00  0.00 ]
+Key: VMAXSHZrr_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VMAXSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrm:  [ 0.00  0.00 ]
+Key: VMAXSSZrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrr:  [ 0.00  0.00 ]
+Key: VMAXSSZrr_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VMAXSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMAXSSrm:  [ 0.00  0.00 ]
+Key: VMAXSSrm_Int:  [ 0.00  0.00 ]
+Key: VMAXSSrr:  [ 0.00  0.00 ]
+Key: VMAXSSrr_Int:  [ 0.00  0.00 ]
+Key: VMCALL:  [ 0.00  0.00 ]
+Key: VMCLEARm:  [ 0.00  0.00 ]
+Key: VMFUNC:  [ 0.00  0.00 ]
+Key: VMINBF:  [ 0.00  0.00 ]
+Key: VMINCPDYrm:  [ 0.00  0.00 ]
+Key: VMINCPDYrr:  [ 0.00  0.00 ]
+Key: VMINCPDZ:  [ 0.00  0.00 ]
+Key: VMINCPDZrm:  [ 0.00  0.00 ]
+Key: VMINCPDZrmb:  [ 0.00  0.00 ]
+Key: VMINCPDZrmbk:  [ 0.00  0.00 ]
+Key: VMINCPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMINCPDZrmk:  [ 0.00  0.00 ]
+Key: VMINCPDZrmkz:  [ 0.00  0.00 ]
+Key: VMINCPDZrr:  [ 0.00  0.00 ]
+Key: VMINCPDZrrk:  [ 0.00  0.00 ]
+Key: VMINCPDZrrkz:  [ 0.00  0.00 ]
+Key: VMINCPDrm:  [ 0.00  0.00 ]
+Key: VMINCPDrr:  [ 0.00  0.00 ]
+Key: VMINCPHZ:  [ 0.00  0.00 ]
+Key: VMINCPHZrm:  [ 0.00  0.00 ]
+Key: VMINCPHZrmb:  [ 0.00  0.00 ]
+Key: VMINCPHZrmbk:  [ 0.00  0.00 ]
+Key: VMINCPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMINCPHZrmk:  [ 0.00  0.00 ]
+Key: VMINCPHZrmkz:  [ 0.00  0.00 ]
+Key: VMINCPHZrr:  [ 0.00  0.00 ]
+Key: VMINCPHZrrk:  [ 0.00  0.00 ]
+Key: VMINCPHZrrkz:  [ 0.00  0.00 ]
+Key: VMINCPSYrm:  [ 0.00  0.00 ]
+Key: VMINCPSYrr:  [ 0.00  0.00 ]
+Key: VMINCPSZ:  [ 0.00  0.00 ]
+Key: VMINCPSZrm:  [ 0.00  0.00 ]
+Key: VMINCPSZrmb:  [ 0.00  0.00 ]
+Key: VMINCPSZrmbk:  [ 0.00  0.00 ]
+Key: VMINCPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMINCPSZrmk:  [ 0.00  0.00 ]
+Key: VMINCPSZrmkz:  [ 0.00  0.00 ]
+Key: VMINCPSZrr:  [ 0.00  0.00 ]
+Key: VMINCPSZrrk:  [ 0.00  0.00 ]
+Key: VMINCPSZrrkz:  [ 0.00  0.00 ]
+Key: VMINCPSrm:  [ 0.00  0.00 ]
+Key: VMINCPSrr:  [ 0.00  0.00 ]
+Key: VMINCSDZrm:  [ 0.00  0.00 ]
+Key: VMINCSDZrr:  [ 0.00  0.00 ]
+Key: VMINCSDrm:  [ 0.00  0.00 ]
+Key: VMINCSDrr:  [ 0.00  0.00 ]
+Key: VMINCSHZrm:  [ 0.00  0.00 ]
+Key: VMINCSHZrr:  [ 0.00  0.00 ]
+Key: VMINCSSZrm:  [ 0.00  0.00 ]
+Key: VMINCSSZrr:  [ 0.00  0.00 ]
+Key: VMINCSSrm:  [ 0.00  0.00 ]
+Key: VMINCSSrr:  [ 0.00  0.00 ]
+Key: VMINMAXBF:  [ 0.00  0.00 ]
+Key: VMINMAXPDZ:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmbi:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmbik:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmbikz:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmi:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmik:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrmikz:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrri:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrrib:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrribk:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrribkz:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrrik:  [ 0.00  0.00 ]
+Key: VMINMAXPDZrrikz:  [ 0.00  0.00 ]
+Key: VMINMAXPHZ:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmbi:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmbik:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmbikz:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmi:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmik:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrmikz:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrri:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrrib:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrribk:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrribkz:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrrik:  [ 0.00  0.00 ]
+Key: VMINMAXPHZrrikz:  [ 0.00  0.00 ]
+Key: VMINMAXPSZ:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmbi:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmbik:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmbikz:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmi:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmik:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrmikz:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrri:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrrib:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrribk:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrribkz:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrrik:  [ 0.00  0.00 ]
+Key: VMINMAXPSZrrikz:  [ 0.00  0.00 ]
+Key: VMINMAXSDrmi:  [ 0.00  0.00 ]
+Key: VMINMAXSDrmi_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrmik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrmikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrri:  [ 0.00  0.00 ]
+Key: VMINMAXSDrri_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrrib_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrribk_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrribkz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrrik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSDrrikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrmi:  [ 0.00  0.00 ]
+Key: VMINMAXSHrmi_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrmik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrmikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrri:  [ 0.00  0.00 ]
+Key: VMINMAXSHrri_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrrib_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrribk_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrribkz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrrik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSHrrikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrmi:  [ 0.00  0.00 ]
+Key: VMINMAXSSrmi_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrmik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrmikz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrri:  [ 0.00  0.00 ]
+Key: VMINMAXSSrri_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrrib_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrribk_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrribkz_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrrik_Int:  [ 0.00  0.00 ]
+Key: VMINMAXSSrrikz_Int:  [ 0.00  0.00 ]
+Key: VMINPDYrm:  [ 0.00  0.00 ]
+Key: VMINPDYrr:  [ 0.00  0.00 ]
+Key: VMINPDZ:  [ 0.00  0.00 ]
+Key: VMINPDZrm:  [ 0.00  0.00 ]
+Key: VMINPDZrmb:  [ 0.00  0.00 ]
+Key: VMINPDZrmbk:  [ 0.00  0.00 ]
+Key: VMINPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMINPDZrmk:  [ 0.00  0.00 ]
+Key: VMINPDZrmkz:  [ 0.00  0.00 ]
+Key: VMINPDZrr:  [ 0.00  0.00 ]
+Key: VMINPDZrrb:  [ 0.00  0.00 ]
+Key: VMINPDZrrbk:  [ 0.00  0.00 ]
+Key: VMINPDZrrbkz:  [ 0.00  0.00 ]
+Key: VMINPDZrrk:  [ 0.00  0.00 ]
+Key: VMINPDZrrkz:  [ 0.00  0.00 ]
+Key: VMINPDrm:  [ 0.00  0.00 ]
+Key: VMINPDrr:  [ 0.00  0.00 ]
+Key: VMINPHZ:  [ 0.00  0.00 ]
+Key: VMINPHZrm:  [ 0.00  0.00 ]
+Key: VMINPHZrmb:  [ 0.00  0.00 ]
+Key: VMINPHZrmbk:  [ 0.00  0.00 ]
+Key: VMINPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMINPHZrmk:  [ 0.00  0.00 ]
+Key: VMINPHZrmkz:  [ 0.00  0.00 ]
+Key: VMINPHZrr:  [ 0.00  0.00 ]
+Key: VMINPHZrrb:  [ 0.00  0.00 ]
+Key: VMINPHZrrbk:  [ 0.00  0.00 ]
+Key: VMINPHZrrbkz:  [ 0.00  0.00 ]
+Key: VMINPHZrrk:  [ 0.00  0.00 ]
+Key: VMINPHZrrkz:  [ 0.00  0.00 ]
+Key: VMINPSYrm:  [ 0.00  0.00 ]
+Key: VMINPSYrr:  [ 0.00  0.00 ]
+Key: VMINPSZ:  [ 0.00  0.00 ]
+Key: VMINPSZrm:  [ 0.00  0.00 ]
+Key: VMINPSZrmb:  [ 0.00  0.00 ]
+Key: VMINPSZrmbk:  [ 0.00  0.00 ]
+Key: VMINPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMINPSZrmk:  [ 0.00  0.00 ]
+Key: VMINPSZrmkz:  [ 0.00  0.00 ]
+Key: VMINPSZrr:  [ 0.00  0.00 ]
+Key: VMINPSZrrb:  [ 0.00  0.00 ]
+Key: VMINPSZrrbk:  [ 0.00  0.00 ]
+Key: VMINPSZrrbkz:  [ 0.00  0.00 ]
+Key: VMINPSZrrk:  [ 0.00  0.00 ]
+Key: VMINPSZrrkz:  [ 0.00  0.00 ]
+Key: VMINPSrm:  [ 0.00  0.00 ]
+Key: VMINPSrr:  [ 0.00  0.00 ]
+Key: VMINSDZrm:  [ 0.00  0.00 ]
+Key: VMINSDZrm_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrr:  [ 0.00  0.00 ]
+Key: VMINSDZrr_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VMINSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMINSDrm:  [ 0.00  0.00 ]
+Key: VMINSDrm_Int:  [ 0.00  0.00 ]
+Key: VMINSDrr:  [ 0.00  0.00 ]
+Key: VMINSDrr_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrm:  [ 0.00  0.00 ]
+Key: VMINSHZrm_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrr:  [ 0.00  0.00 ]
+Key: VMINSHZrr_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VMINSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrm:  [ 0.00  0.00 ]
+Key: VMINSSZrm_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrr:  [ 0.00  0.00 ]
+Key: VMINSSZrr_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VMINSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMINSSrm:  [ 0.00  0.00 ]
+Key: VMINSSrm_Int:  [ 0.00  0.00 ]
+Key: VMINSSrr:  [ 0.00  0.00 ]
+Key: VMINSSrr_Int:  [ 0.00  0.00 ]
+Key: VMLAUNCH:  [ 0.00  0.00 ]
+Key: VMLOAD:  [ 0.00  0.00 ]
+Key: VMMCALL:  [ 0.00  0.00 ]
+Key: VMOV:  [ 0.00  0.00 ]
+Key: VMOVAPDYmr:  [ 0.00  0.00 ]
+Key: VMOVAPDYrm:  [ 0.00  0.00 ]
+Key: VMOVAPDYrr:  [ 0.00  0.00 ]
+Key: VMOVAPDYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPDZ:  [ 0.00  0.00 ]
+Key: VMOVAPDZmr:  [ 0.00  0.00 ]
+Key: VMOVAPDZmrk:  [ 0.00  0.00 ]
+Key: VMOVAPDZrm:  [ 0.00  0.00 ]
+Key: VMOVAPDZrmk:  [ 0.00  0.00 ]
+Key: VMOVAPDZrmkz:  [ 0.00  0.00 ]
+Key: VMOVAPDZrr:  [ 0.00  0.00 ]
+Key: VMOVAPDZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPDZrrk:  [ 0.00  0.00 ]
+Key: VMOVAPDZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVAPDZrrkz:  [ 0.00  0.00 ]
+Key: VMOVAPDZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVAPDmr:  [ 0.00  0.00 ]
+Key: VMOVAPDrm:  [ 0.00  0.00 ]
+Key: VMOVAPDrr:  [ 0.00  0.00 ]
+Key: VMOVAPDrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSYmr:  [ 0.00  0.00 ]
+Key: VMOVAPSYrm:  [ 0.00  0.00 ]
+Key: VMOVAPSYrr:  [ 0.00  0.00 ]
+Key: VMOVAPSYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSZ:  [ 0.00  0.00 ]
+Key: VMOVAPSZmr:  [ 0.00  0.00 ]
+Key: VMOVAPSZmrk:  [ 0.00  0.00 ]
+Key: VMOVAPSZrm:  [ 0.00  0.00 ]
+Key: VMOVAPSZrmk:  [ 0.00  0.00 ]
+Key: VMOVAPSZrmkz:  [ 0.00  0.00 ]
+Key: VMOVAPSZrr:  [ 0.00  0.00 ]
+Key: VMOVAPSZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSZrrk:  [ 0.00  0.00 ]
+Key: VMOVAPSZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSZrrkz:  [ 0.00  0.00 ]
+Key: VMOVAPSZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVAPSmr:  [ 0.00  0.00 ]
+Key: VMOVAPSrm:  [ 0.00  0.00 ]
+Key: VMOVAPSrr:  [ 0.00  0.00 ]
+Key: VMOVAPSrr_REV:  [ 0.00  0.00 ]
+Key: VMOVDDUPYrm:  [ 0.00  0.00 ]
+Key: VMOVDDUPYrr:  [ 0.00  0.00 ]
+Key: VMOVDDUPZ:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrm:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrmk:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrmkz:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrr:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrrk:  [ 0.00  0.00 ]
+Key: VMOVDDUPZrrkz:  [ 0.00  0.00 ]
+Key: VMOVDDUPrm:  [ 0.00  0.00 ]
+Key: VMOVDDUPrr:  [ 0.00  0.00 ]
+Key: VMOVDI:  [ 0.00  0.00 ]
+Key: VMOVDQA:  [ 0.00  0.00 ]
+Key: VMOVDQAYmr:  [ 0.00  0.00 ]
+Key: VMOVDQAYrm:  [ 0.00  0.00 ]
+Key: VMOVDQAYrr:  [ 0.00  0.00 ]
+Key: VMOVDQAYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVDQAmr:  [ 0.00  0.00 ]
+Key: VMOVDQArm:  [ 0.00  0.00 ]
+Key: VMOVDQArr:  [ 0.00  0.00 ]
+Key: VMOVDQArr_REV:  [ 0.00  0.00 ]
+Key: VMOVDQU:  [ 0.00  0.00 ]
+Key: VMOVDQUYmr:  [ 0.00  0.00 ]
+Key: VMOVDQUYrm:  [ 0.00  0.00 ]
+Key: VMOVDQUYrr:  [ 0.00  0.00 ]
+Key: VMOVDQUYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVDQUmr:  [ 0.00  0.00 ]
+Key: VMOVDQUrm:  [ 0.00  0.00 ]
+Key: VMOVDQUrr:  [ 0.00  0.00 ]
+Key: VMOVDQUrr_REV:  [ 0.00  0.00 ]
+Key: VMOVHLPSZrr:  [ 0.00  0.00 ]
+Key: VMOVHLPSrr:  [ 0.00  0.00 ]
+Key: VMOVHPDZ:  [ 0.00  0.00 ]
+Key: VMOVHPDmr:  [ 0.00  0.00 ]
+Key: VMOVHPDrm:  [ 0.00  0.00 ]
+Key: VMOVHPSZ:  [ 0.00  0.00 ]
+Key: VMOVHPSmr:  [ 0.00  0.00 ]
+Key: VMOVHPSrm:  [ 0.00  0.00 ]
+Key: VMOVLHPSZrr:  [ 0.00  0.00 ]
+Key: VMOVLHPSrr:  [ 0.00  0.00 ]
+Key: VMOVLPDZ:  [ 0.00  0.00 ]
+Key: VMOVLPDmr:  [ 0.00  0.00 ]
+Key: VMOVLPDrm:  [ 0.00  0.00 ]
+Key: VMOVLPSZ:  [ 0.00  0.00 ]
+Key: VMOVLPSmr:  [ 0.00  0.00 ]
+Key: VMOVLPSrm:  [ 0.00  0.00 ]
+Key: VMOVMSKPDYrr:  [ 0.00  0.00 ]
+Key: VMOVMSKPDrr:  [ 0.00  0.00 ]
+Key: VMOVMSKPSYrr:  [ 0.00  0.00 ]
+Key: VMOVMSKPSrr:  [ 0.00  0.00 ]
+Key: VMOVNTDQAYrm:  [ 0.00  0.00 ]
+Key: VMOVNTDQAZ:  [ 0.00  0.00 ]
+Key: VMOVNTDQAZrm:  [ 0.00  0.00 ]
+Key: VMOVNTDQArm:  [ 0.00  0.00 ]
+Key: VMOVNTDQYmr:  [ 0.00  0.00 ]
+Key: VMOVNTDQZ:  [ 0.00  0.00 ]
+Key: VMOVNTDQZmr:  [ 0.00  0.00 ]
+Key: VMOVNTDQmr:  [ 0.00  0.00 ]
+Key: VMOVNTPDYmr:  [ 0.00  0.00 ]
+Key: VMOVNTPDZ:  [ 0.00  0.00 ]
+Key: VMOVNTPDZmr:  [ 0.00  0.00 ]
+Key: VMOVNTPDmr:  [ 0.00  0.00 ]
+Key: VMOVNTPSYmr:  [ 0.00  0.00 ]
+Key: VMOVNTPSZ:  [ 0.00  0.00 ]
+Key: VMOVNTPSZmr:  [ 0.00  0.00 ]
+Key: VMOVNTPSmr:  [ 0.00  0.00 ]
+Key: VMOVPDI:  [ 0.00  0.00 ]
+Key: VMOVPQI:  [ 0.00  0.00 ]
+Key: VMOVPQIto:  [ 0.00  0.00 ]
+Key: VMOVQI:  [ 0.00  0.00 ]
+Key: VMOVRSBZ:  [ 0.00  0.00 ]
+Key: VMOVRSBZm:  [ 0.00  0.00 ]
+Key: VMOVRSBZmk:  [ 0.00  0.00 ]
+Key: VMOVRSBZmkz:  [ 0.00  0.00 ]
+Key: VMOVRSDZ:  [ 0.00  0.00 ]
+Key: VMOVRSDZm:  [ 0.00  0.00 ]
+Key: VMOVRSDZmk:  [ 0.00  0.00 ]
+Key: VMOVRSDZmkz:  [ 0.00  0.00 ]
+Key: VMOVRSQZ:  [ 0.00  0.00 ]
+Key: VMOVRSQZm:  [ 0.00  0.00 ]
+Key: VMOVRSQZmk:  [ 0.00  0.00 ]
+Key: VMOVRSQZmkz:  [ 0.00  0.00 ]
+Key: VMOVRSWZ:  [ 0.00  0.00 ]
+Key: VMOVRSWZm:  [ 0.00  0.00 ]
+Key: VMOVRSWZmk:  [ 0.00  0.00 ]
+Key: VMOVRSWZmkz:  [ 0.00  0.00 ]
+Key: VMOVSDZmr:  [ 0.00  0.00 ]
+Key: VMOVSDZmrk:  [ 0.00  0.00 ]
+Key: VMOVSDZrm:  [ 0.00  0.00 ]
+Key: VMOVSDZrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSDZrmk:  [ 0.00  0.00 ]
+Key: VMOVSDZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSDZrr:  [ 0.00  0.00 ]
+Key: VMOVSDZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVSDZrrk:  [ 0.00  0.00 ]
+Key: VMOVSDZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVSDZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSDZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVSDmr:  [ 0.00  0.00 ]
+Key: VMOVSDrm:  [ 0.00  0.00 ]
+Key: VMOVSDrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSDrr:  [ 0.00  0.00 ]
+Key: VMOVSDrr_REV:  [ 0.00  0.00 ]
+Key: VMOVSDto:  [ 0.00  0.00 ]
+Key: VMOVSH:  [ 0.00  0.00 ]
+Key: VMOVSHDUPYrm:  [ 0.00  0.00 ]
+Key: VMOVSHDUPYrr:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZ:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrm:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrmk:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrr:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrrk:  [ 0.00  0.00 ]
+Key: VMOVSHDUPZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSHDUPrm:  [ 0.00  0.00 ]
+Key: VMOVSHDUPrr:  [ 0.00  0.00 ]
+Key: VMOVSHZmr:  [ 0.00  0.00 ]
+Key: VMOVSHZmrk:  [ 0.00  0.00 ]
+Key: VMOVSHZrm:  [ 0.00  0.00 ]
+Key: VMOVSHZrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSHZrmk:  [ 0.00  0.00 ]
+Key: VMOVSHZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSHZrr:  [ 0.00  0.00 ]
+Key: VMOVSHZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVSHZrrk:  [ 0.00  0.00 ]
+Key: VMOVSHZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVSHZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSHZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVSHtoW:  [ 0.00  0.00 ]
+Key: VMOVSLDUPYrm:  [ 0.00  0.00 ]
+Key: VMOVSLDUPYrr:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZ:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrm:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrmk:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrr:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrrk:  [ 0.00  0.00 ]
+Key: VMOVSLDUPZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSLDUPrm:  [ 0.00  0.00 ]
+Key: VMOVSLDUPrr:  [ 0.00  0.00 ]
+Key: VMOVSS:  [ 0.00  0.00 ]
+Key: VMOVSSZmr:  [ 0.00  0.00 ]
+Key: VMOVSSZmrk:  [ 0.00  0.00 ]
+Key: VMOVSSZrm:  [ 0.00  0.00 ]
+Key: VMOVSSZrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSSZrmk:  [ 0.00  0.00 ]
+Key: VMOVSSZrmkz:  [ 0.00  0.00 ]
+Key: VMOVSSZrr:  [ 0.00  0.00 ]
+Key: VMOVSSZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVSSZrrk:  [ 0.00  0.00 ]
+Key: VMOVSSZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVSSZrrkz:  [ 0.00  0.00 ]
+Key: VMOVSSZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVSSmr:  [ 0.00  0.00 ]
+Key: VMOVSSrm:  [ 0.00  0.00 ]
+Key: VMOVSSrm_alt:  [ 0.00  0.00 ]
+Key: VMOVSSrr:  [ 0.00  0.00 ]
+Key: VMOVSSrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDYmr:  [ 0.00  0.00 ]
+Key: VMOVUPDYrm:  [ 0.00  0.00 ]
+Key: VMOVUPDYrr:  [ 0.00  0.00 ]
+Key: VMOVUPDYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDZ:  [ 0.00  0.00 ]
+Key: VMOVUPDZmr:  [ 0.00  0.00 ]
+Key: VMOVUPDZmrk:  [ 0.00  0.00 ]
+Key: VMOVUPDZrm:  [ 0.00  0.00 ]
+Key: VMOVUPDZrmk:  [ 0.00  0.00 ]
+Key: VMOVUPDZrmkz:  [ 0.00  0.00 ]
+Key: VMOVUPDZrr:  [ 0.00  0.00 ]
+Key: VMOVUPDZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDZrrk:  [ 0.00  0.00 ]
+Key: VMOVUPDZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDZrrkz:  [ 0.00  0.00 ]
+Key: VMOVUPDZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVUPDmr:  [ 0.00  0.00 ]
+Key: VMOVUPDrm:  [ 0.00  0.00 ]
+Key: VMOVUPDrr:  [ 0.00  0.00 ]
+Key: VMOVUPDrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSYmr:  [ 0.00  0.00 ]
+Key: VMOVUPSYrm:  [ 0.00  0.00 ]
+Key: VMOVUPSYrr:  [ 0.00  0.00 ]
+Key: VMOVUPSYrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSZ:  [ 0.00  0.00 ]
+Key: VMOVUPSZmr:  [ 0.00  0.00 ]
+Key: VMOVUPSZmrk:  [ 0.00  0.00 ]
+Key: VMOVUPSZrm:  [ 0.00  0.00 ]
+Key: VMOVUPSZrmk:  [ 0.00  0.00 ]
+Key: VMOVUPSZrmkz:  [ 0.00  0.00 ]
+Key: VMOVUPSZrr:  [ 0.00  0.00 ]
+Key: VMOVUPSZrr_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSZrrk:  [ 0.00  0.00 ]
+Key: VMOVUPSZrrk_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSZrrkz:  [ 0.00  0.00 ]
+Key: VMOVUPSZrrkz_REV:  [ 0.00  0.00 ]
+Key: VMOVUPSmr:  [ 0.00  0.00 ]
+Key: VMOVUPSrm:  [ 0.00  0.00 ]
+Key: VMOVUPSrr:  [ 0.00  0.00 ]
+Key: VMOVUPSrr_REV:  [ 0.00  0.00 ]
+Key: VMOVW:  [ 0.00  0.00 ]
+Key: VMOVWmr:  [ 0.00  0.00 ]
+Key: VMOVWrm:  [ 0.00  0.00 ]
+Key: VMOVZPDILo:  [ 0.00  0.00 ]
+Key: VMOVZPQILo:  [ 0.00  0.00 ]
+Key: VMOVZPWILo:  [ 0.00  0.00 ]
+Key: VMPSADBWYrmi:  [ 0.00  0.00 ]
+Key: VMPSADBWYrri:  [ 0.00  0.00 ]
+Key: VMPSADBWZ:  [ 0.00  0.00 ]
+Key: VMPSADBWZrmi:  [ 0.00  0.00 ]
+Key: VMPSADBWZrmik:  [ 0.00  0.00 ]
+Key: VMPSADBWZrmikz:  [ 0.00  0.00 ]
+Key: VMPSADBWZrri:  [ 0.00  0.00 ]
+Key: VMPSADBWZrrik:  [ 0.00  0.00 ]
+Key: VMPSADBWZrrikz:  [ 0.00  0.00 ]
+Key: VMPSADBWrmi:  [ 0.00  0.00 ]
+Key: VMPSADBWrri:  [ 0.00  0.00 ]
+Key: VMPTRLDm:  [ 0.00  0.00 ]
+Key: VMPTRSTm:  [ 0.00  0.00 ]
+Key: VMREAD:  [ 0.00  0.00 ]
+Key: VMRESUME:  [ 0.00  0.00 ]
+Key: VMRUN:  [ 0.00  0.00 ]
+Key: VMSAVE:  [ 0.00  0.00 ]
+Key: VMULBF:  [ 0.00  0.00 ]
+Key: VMULPDYrm:  [ 0.00  0.00 ]
+Key: VMULPDYrr:  [ 0.00  0.00 ]
+Key: VMULPDZ:  [ 0.00  0.00 ]
+Key: VMULPDZrm:  [ 0.00  0.00 ]
+Key: VMULPDZrmb:  [ 0.00  0.00 ]
+Key: VMULPDZrmbk:  [ 0.00  0.00 ]
+Key: VMULPDZrmbkz:  [ 0.00  0.00 ]
+Key: VMULPDZrmk:  [ 0.00  0.00 ]
+Key: VMULPDZrmkz:  [ 0.00  0.00 ]
+Key: VMULPDZrr:  [ 0.00  0.00 ]
+Key: VMULPDZrrb:  [ 0.00  0.00 ]
+Key: VMULPDZrrbk:  [ 0.00  0.00 ]
+Key: VMULPDZrrbkz:  [ 0.00  0.00 ]
+Key: VMULPDZrrk:  [ 0.00  0.00 ]
+Key: VMULPDZrrkz:  [ 0.00  0.00 ]
+Key: VMULPDrm:  [ 0.00  0.00 ]
+Key: VMULPDrr:  [ 0.00  0.00 ]
+Key: VMULPHZ:  [ 0.00  0.00 ]
+Key: VMULPHZrm:  [ 0.00  0.00 ]
+Key: VMULPHZrmb:  [ 0.00  0.00 ]
+Key: VMULPHZrmbk:  [ 0.00  0.00 ]
+Key: VMULPHZrmbkz:  [ 0.00  0.00 ]
+Key: VMULPHZrmk:  [ 0.00  0.00 ]
+Key: VMULPHZrmkz:  [ 0.00  0.00 ]
+Key: VMULPHZrr:  [ 0.00  0.00 ]
+Key: VMULPHZrrb:  [ 0.00  0.00 ]
+Key: VMULPHZrrbk:  [ 0.00  0.00 ]
+Key: VMULPHZrrbkz:  [ 0.00  0.00 ]
+Key: VMULPHZrrk:  [ 0.00  0.00 ]
+Key: VMULPHZrrkz:  [ 0.00  0.00 ]
+Key: VMULPSYrm:  [ 0.00  0.00 ]
+Key: VMULPSYrr:  [ 0.00  0.00 ]
+Key: VMULPSZ:  [ 0.00  0.00 ]
+Key: VMULPSZrm:  [ 0.00  0.00 ]
+Key: VMULPSZrmb:  [ 0.00  0.00 ]
+Key: VMULPSZrmbk:  [ 0.00  0.00 ]
+Key: VMULPSZrmbkz:  [ 0.00  0.00 ]
+Key: VMULPSZrmk:  [ 0.00  0.00 ]
+Key: VMULPSZrmkz:  [ 0.00  0.00 ]
+Key: VMULPSZrr:  [ 0.00  0.00 ]
+Key: VMULPSZrrb:  [ 0.00  0.00 ]
+Key: VMULPSZrrbk:  [ 0.00  0.00 ]
+Key: VMULPSZrrbkz:  [ 0.00  0.00 ]
+Key: VMULPSZrrk:  [ 0.00  0.00 ]
+Key: VMULPSZrrkz:  [ 0.00  0.00 ]
+Key: VMULPSrm:  [ 0.00  0.00 ]
+Key: VMULPSrr:  [ 0.00  0.00 ]
+Key: VMULSDZrm:  [ 0.00  0.00 ]
+Key: VMULSDZrm_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrr:  [ 0.00  0.00 ]
+Key: VMULSDZrr_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VMULSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMULSDrm:  [ 0.00  0.00 ]
+Key: VMULSDrm_Int:  [ 0.00  0.00 ]
+Key: VMULSDrr:  [ 0.00  0.00 ]
+Key: VMULSDrr_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrm:  [ 0.00  0.00 ]
+Key: VMULSHZrm_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrr:  [ 0.00  0.00 ]
+Key: VMULSHZrr_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VMULSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrm:  [ 0.00  0.00 ]
+Key: VMULSSZrm_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrr:  [ 0.00  0.00 ]
+Key: VMULSSZrr_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VMULSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VMULSSrm:  [ 0.00  0.00 ]
+Key: VMULSSrm_Int:  [ 0.00  0.00 ]
+Key: VMULSSrr:  [ 0.00  0.00 ]
+Key: VMULSSrr_Int:  [ 0.00  0.00 ]
+Key: VMWRITE:  [ 0.00  0.00 ]
+Key: VMXOFF:  [ 0.00  0.00 ]
+Key: VMXON:  [ 0.00  0.00 ]
+Key: VORPDYrm:  [ 0.00  0.00 ]
+Key: VORPDYrr:  [ 0.00  0.00 ]
+Key: VORPDZ:  [ 0.00  0.00 ]
+Key: VORPDZrm:  [ 0.00  0.00 ]
+Key: VORPDZrmb:  [ 0.00  0.00 ]
+Key: VORPDZrmbk:  [ 0.00  0.00 ]
+Key: VORPDZrmbkz:  [ 0.00  0.00 ]
+Key: VORPDZrmk:  [ 0.00  0.00 ]
+Key: VORPDZrmkz:  [ 0.00  0.00 ]
+Key: VORPDZrr:  [ 0.00  0.00 ]
+Key: VORPDZrrk:  [ 0.00  0.00 ]
+Key: VORPDZrrkz:  [ 0.00  0.00 ]
+Key: VORPDrm:  [ 0.00  0.00 ]
+Key: VORPDrr:  [ 0.00  0.00 ]
+Key: VORPSYrm:  [ 0.00  0.00 ]
+Key: VORPSYrr:  [ 0.00  0.00 ]
+Key: VORPSZ:  [ 0.00  0.00 ]
+Key: VORPSZrm:  [ 0.00  0.00 ]
+Key: VORPSZrmb:  [ 0.00  0.00 ]
+Key: VORPSZrmbk:  [ 0.00  0.00 ]
+Key: VORPSZrmbkz:  [ 0.00  0.00 ]
+Key: VORPSZrmk:  [ 0.00  0.00 ]
+Key: VORPSZrmkz:  [ 0.00  0.00 ]
+Key: VORPSZrr:  [ 0.00  0.00 ]
+Key: VORPSZrrk:  [ 0.00  0.00 ]
+Key: VORPSZrrkz:  [ 0.00  0.00 ]
+Key: VORPSrm:  [ 0.00  0.00 ]
+Key: VORPSrr:  [ 0.00  0.00 ]
+Key: VP:  [ 0.00  0.00 ]
+Key: VPABSBYrm:  [ 0.00  0.00 ]
+Key: VPABSBYrr:  [ 0.00  0.00 ]
+Key: VPABSBZ:  [ 0.00  0.00 ]
+Key: VPABSBZrm:  [ 0.00  0.00 ]
+Key: VPABSBZrmk:  [ 0.00  0.00 ]
+Key: VPABSBZrmkz:  [ 0.00  0.00 ]
+Key: VPABSBZrr:  [ 0.00  0.00 ]
+Key: VPABSBZrrk:  [ 0.00  0.00 ]
+Key: VPABSBZrrkz:  [ 0.00  0.00 ]
+Key: VPABSBrm:  [ 0.00  0.00 ]
+Key: VPABSBrr:  [ 0.00  0.00 ]
+Key: VPABSDYrm:  [ 0.00  0.00 ]
+Key: VPABSDYrr:  [ 0.00  0.00 ]
+Key: VPABSDZ:  [ 0.00  0.00 ]
+Key: VPABSDZrm:  [ 0.00  0.00 ]
+Key: VPABSDZrmb:  [ 0.00  0.00 ]
+Key: VPABSDZrmbk:  [ 0.00  0.00 ]
+Key: VPABSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPABSDZrmk:  [ 0.00  0.00 ]
+Key: VPABSDZrmkz:  [ 0.00  0.00 ]
+Key: VPABSDZrr:  [ 0.00  0.00 ]
+Key: VPABSDZrrk:  [ 0.00  0.00 ]
+Key: VPABSDZrrkz:  [ 0.00  0.00 ]
+Key: VPABSDrm:  [ 0.00  0.00 ]
+Key: VPABSDrr:  [ 0.00  0.00 ]
+Key: VPABSQZ:  [ 0.00  0.00 ]
+Key: VPABSQZrm:  [ 0.00  0.00 ]
+Key: VPABSQZrmb:  [ 0.00  0.00 ]
+Key: VPABSQZrmbk:  [ 0.00  0.00 ]
+Key: VPABSQZrmbkz:  [ 0.00  0.00 ]
+Key: VPABSQZrmk:  [ 0.00  0.00 ]
+Key: VPABSQZrmkz:  [ 0.00  0.00 ]
+Key: VPABSQZrr:  [ 0.00  0.00 ]
+Key: VPABSQZrrk:  [ 0.00  0.00 ]
+Key: VPABSQZrrkz:  [ 0.00  0.00 ]
+Key: VPABSWYrm:  [ 0.00  0.00 ]
+Key: VPABSWYrr:  [ 0.00  0.00 ]
+Key: VPABSWZ:  [ 0.00  0.00 ]
+Key: VPABSWZrm:  [ 0.00  0.00 ]
+Key: VPABSWZrmk:  [ 0.00  0.00 ]
+Key: VPABSWZrmkz:  [ 0.00  0.00 ]
+Key: VPABSWZrr:  [ 0.00  0.00 ]
+Key: VPABSWZrrk:  [ 0.00  0.00 ]
+Key: VPABSWZrrkz:  [ 0.00  0.00 ]
+Key: VPABSWrm:  [ 0.00  0.00 ]
+Key: VPABSWrr:  [ 0.00  0.00 ]
+Key: VPACKSSDWYrm:  [ 0.00  0.00 ]
+Key: VPACKSSDWYrr:  [ 0.00  0.00 ]
+Key: VPACKSSDWZ:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrm:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmb:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmbk:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmbkz:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmk:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrmkz:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrr:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrrk:  [ 0.00  0.00 ]
+Key: VPACKSSDWZrrkz:  [ 0.00  0.00 ]
+Key: VPACKSSDWrm:  [ 0.00  0.00 ]
+Key: VPACKSSDWrr:  [ 0.00  0.00 ]
+Key: VPACKSSWBYrm:  [ 0.00  0.00 ]
+Key: VPACKSSWBYrr:  [ 0.00  0.00 ]
+Key: VPACKSSWBZ:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrm:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrmk:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrmkz:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrr:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrrk:  [ 0.00  0.00 ]
+Key: VPACKSSWBZrrkz:  [ 0.00  0.00 ]
+Key: VPACKSSWBrm:  [ 0.00  0.00 ]
+Key: VPACKSSWBrr:  [ 0.00  0.00 ]
+Key: VPACKUSDWYrm:  [ 0.00  0.00 ]
+Key: VPACKUSDWYrr:  [ 0.00  0.00 ]
+Key: VPACKUSDWZ:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrm:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmb:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmbk:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmbkz:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmk:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrmkz:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrr:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrrk:  [ 0.00  0.00 ]
+Key: VPACKUSDWZrrkz:  [ 0.00  0.00 ]
+Key: VPACKUSDWrm:  [ 0.00  0.00 ]
+Key: VPACKUSDWrr:  [ 0.00  0.00 ]
+Key: VPACKUSWBYrm:  [ 0.00  0.00 ]
+Key: VPACKUSWBYrr:  [ 0.00  0.00 ]
+Key: VPACKUSWBZ:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrm:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrmk:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrmkz:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrr:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrrk:  [ 0.00  0.00 ]
+Key: VPACKUSWBZrrkz:  [ 0.00  0.00 ]
+Key: VPACKUSWBrm:  [ 0.00  0.00 ]
+Key: VPACKUSWBrr:  [ 0.00  0.00 ]
+Key: VPADDBYrm:  [ 0.00  0.00 ]
+Key: VPADDBYrr:  [ 0.00  0.00 ]
+Key: VPADDBZ:  [ 0.00  0.00 ]
+Key: VPADDBZrm:  [ 0.00  0.00 ]
+Key: VPADDBZrmk:  [ 0.00  0.00 ]
+Key: VPADDBZrmkz:  [ 0.00  0.00 ]
+Key: VPADDBZrr:  [ 0.00  0.00 ]
+Key: VPADDBZrrk:  [ 0.00  0.00 ]
+Key: VPADDBZrrkz:  [ 0.00  0.00 ]
+Key: VPADDBrm:  [ 0.00  0.00 ]
+Key: VPADDBrr:  [ 0.00  0.00 ]
+Key: VPADDDYrm:  [ 0.00  0.00 ]
+Key: VPADDDYrr:  [ 0.00  0.00 ]
+Key: VPADDDZ:  [ 0.00  0.00 ]
+Key: VPADDDZrm:  [ 0.00  0.00 ]
+Key: VPADDDZrmb:  [ 0.00  0.00 ]
+Key: VPADDDZrmbk:  [ 0.00  0.00 ]
+Key: VPADDDZrmbkz:  [ 0.00  0.00 ]
+Key: VPADDDZrmk:  [ 0.00  0.00 ]
+Key: VPADDDZrmkz:  [ 0.00  0.00 ]
+Key: VPADDDZrr:  [ 0.00  0.00 ]
+Key: VPADDDZrrk:  [ 0.00  0.00 ]
+Key: VPADDDZrrkz:  [ 0.00  0.00 ]
+Key: VPADDDrm:  [ 0.00  0.00 ]
+Key: VPADDDrr:  [ 0.00  0.00 ]
+Key: VPADDQYrm:  [ 0.00  0.00 ]
+Key: VPADDQYrr:  [ 0.00  0.00 ]
+Key: VPADDQZ:  [ 0.00  0.00 ]
+Key: VPADDQZrm:  [ 0.00  0.00 ]
+Key: VPADDQZrmb:  [ 0.00  0.00 ]
+Key: VPADDQZrmbk:  [ 0.00  0.00 ]
+Key: VPADDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPADDQZrmk:  [ 0.00  0.00 ]
+Key: VPADDQZrmkz:  [ 0.00  0.00 ]
+Key: VPADDQZrr:  [ 0.00  0.00 ]
+Key: VPADDQZrrk:  [ 0.00  0.00 ]
+Key: VPADDQZrrkz:  [ 0.00  0.00 ]
+Key: VPADDQrm:  [ 0.00  0.00 ]
+Key: VPADDQrr:  [ 0.00  0.00 ]
+Key: VPADDSBYrm:  [ 0.00  0.00 ]
+Key: VPADDSBYrr:  [ 0.00  0.00 ]
+Key: VPADDSBZ:  [ 0.00  0.00 ]
+Key: VPADDSBZrm:  [ 0.00  0.00 ]
+Key: VPADDSBZrmk:  [ 0.00  0.00 ]
+Key: VPADDSBZrmkz:  [ 0.00  0.00 ]
+Key: VPADDSBZrr:  [ 0.00  0.00 ]
+Key: VPADDSBZrrk:  [ 0.00  0.00 ]
+Key: VPADDSBZrrkz:  [ 0.00  0.00 ]
+Key: VPADDSBrm:  [ 0.00  0.00 ]
+Key: VPADDSBrr:  [ 0.00  0.00 ]
+Key: VPADDSWYrm:  [ 0.00  0.00 ]
+Key: VPADDSWYrr:  [ 0.00  0.00 ]
+Key: VPADDSWZ:  [ 0.00  0.00 ]
+Key: VPADDSWZrm:  [ 0.00  0.00 ]
+Key: VPADDSWZrmk:  [ 0.00  0.00 ]
+Key: VPADDSWZrmkz:  [ 0.00  0.00 ]
+Key: VPADDSWZrr:  [ 0.00  0.00 ]
+Key: VPADDSWZrrk:  [ 0.00  0.00 ]
+Key: VPADDSWZrrkz:  [ 0.00  0.00 ]
+Key: VPADDSWrm:  [ 0.00  0.00 ]
+Key: VPADDSWrr:  [ 0.00  0.00 ]
+Key: VPADDUSBYrm:  [ 0.00  0.00 ]
+Key: VPADDUSBYrr:  [ 0.00  0.00 ]
+Key: VPADDUSBZ:  [ 0.00  0.00 ]
+Key: VPADDUSBZrm:  [ 0.00  0.00 ]
+Key: VPADDUSBZrmk:  [ 0.00  0.00 ]
+Key: VPADDUSBZrmkz:  [ 0.00  0.00 ]
+Key: VPADDUSBZrr:  [ 0.00  0.00 ]
+Key: VPADDUSBZrrk:  [ 0.00  0.00 ]
+Key: VPADDUSBZrrkz:  [ 0.00  0.00 ]
+Key: VPADDUSBrm:  [ 0.00  0.00 ]
+Key: VPADDUSBrr:  [ 0.00  0.00 ]
+Key: VPADDUSWYrm:  [ 0.00  0.00 ]
+Key: VPADDUSWYrr:  [ 0.00  0.00 ]
+Key: VPADDUSWZ:  [ 0.00  0.00 ]
+Key: VPADDUSWZrm:  [ 0.00  0.00 ]
+Key: VPADDUSWZrmk:  [ 0.00  0.00 ]
+Key: VPADDUSWZrmkz:  [ 0.00  0.00 ]
+Key: VPADDUSWZrr:  [ 0.00  0.00 ]
+Key: VPADDUSWZrrk:  [ 0.00  0.00 ]
+Key: VPADDUSWZrrkz:  [ 0.00  0.00 ]
+Key: VPADDUSWrm:  [ 0.00  0.00 ]
+Key: VPADDUSWrr:  [ 0.00  0.00 ]
+Key: VPADDWYrm:  [ 0.00  0.00 ]
+Key: VPADDWYrr:  [ 0.00  0.00 ]
+Key: VPADDWZ:  [ 0.00  0.00 ]
+Key: VPADDWZrm:  [ 0.00  0.00 ]
+Key: VPADDWZrmk:  [ 0.00  0.00 ]
+Key: VPADDWZrmkz:  [ 0.00  0.00 ]
+Key: VPADDWZrr:  [ 0.00  0.00 ]
+Key: VPADDWZrrk:  [ 0.00  0.00 ]
+Key: VPADDWZrrkz:  [ 0.00  0.00 ]
+Key: VPADDWrm:  [ 0.00  0.00 ]
+Key: VPADDWrr:  [ 0.00  0.00 ]
+Key: VPALIGNRYrmi:  [ 0.00  0.00 ]
+Key: VPALIGNRYrri:  [ 0.00  0.00 ]
+Key: VPALIGNRZ:  [ 0.00  0.00 ]
+Key: VPALIGNRZrmi:  [ 0.00  0.00 ]
+Key: VPALIGNRZrmik:  [ 0.00  0.00 ]
+Key: VPALIGNRZrmikz:  [ 0.00  0.00 ]
+Key: VPALIGNRZrri:  [ 0.00  0.00 ]
+Key: VPALIGNRZrrik:  [ 0.00  0.00 ]
+Key: VPALIGNRZrrikz:  [ 0.00  0.00 ]
+Key: VPALIGNRrmi:  [ 0.00  0.00 ]
+Key: VPALIGNRrri:  [ 0.00  0.00 ]
+Key: VPANDDZ:  [ 0.00  0.00 ]
+Key: VPANDDZrm:  [ 0.00  0.00 ]
+Key: VPANDDZrmb:  [ 0.00  0.00 ]
+Key: VPANDDZrmbk:  [ 0.00  0.00 ]
+Key: VPANDDZrmbkz:  [ 0.00  0.00 ]
+Key: VPANDDZrmk:  [ 0.00  0.00 ]
+Key: VPANDDZrmkz:  [ 0.00  0.00 ]
+Key: VPANDDZrr:  [ 0.00  0.00 ]
+Key: VPANDDZrrk:  [ 0.00  0.00 ]
+Key: VPANDDZrrkz:  [ 0.00  0.00 ]
+Key: VPANDNDZ:  [ 0.00  0.00 ]
+Key: VPANDNDZrm:  [ 0.00  0.00 ]
+Key: VPANDNDZrmb:  [ 0.00  0.00 ]
+Key: VPANDNDZrmbk:  [ 0.00  0.00 ]
+Key: VPANDNDZrmbkz:  [ 0.00  0.00 ]
+Key: VPANDNDZrmk:  [ 0.00  0.00 ]
+Key: VPANDNDZrmkz:  [ 0.00  0.00 ]
+Key: VPANDNDZrr:  [ 0.00  0.00 ]
+Key: VPANDNDZrrk:  [ 0.00  0.00 ]
+Key: VPANDNDZrrkz:  [ 0.00  0.00 ]
+Key: VPANDNQZ:  [ 0.00  0.00 ]
+Key: VPANDNQZrm:  [ 0.00  0.00 ]
+Key: VPANDNQZrmb:  [ 0.00  0.00 ]
+Key: VPANDNQZrmbk:  [ 0.00  0.00 ]
+Key: VPANDNQZrmbkz:  [ 0.00  0.00 ]
+Key: VPANDNQZrmk:  [ 0.00  0.00 ]
+Key: VPANDNQZrmkz:  [ 0.00  0.00 ]
+Key: VPANDNQZrr:  [ 0.00  0.00 ]
+Key: VPANDNQZrrk:  [ 0.00  0.00 ]
+Key: VPANDNQZrrkz:  [ 0.00  0.00 ]
+Key: VPANDNYrm:  [ 0.00  0.00 ]
+Key: VPANDNYrr:  [ 0.00  0.00 ]
+Key: VPANDNrm:  [ 0.00  0.00 ]
+Key: VPANDNrr:  [ 0.00  0.00 ]
+Key: VPANDQZ:  [ 0.00  0.00 ]
+Key: VPANDQZrm:  [ 0.00  0.00 ]
+Key: VPANDQZrmb:  [ 0.00  0.00 ]
+Key: VPANDQZrmbk:  [ 0.00  0.00 ]
+Key: VPANDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPANDQZrmk:  [ 0.00  0.00 ]
+Key: VPANDQZrmkz:  [ 0.00  0.00 ]
+Key: VPANDQZrr:  [ 0.00  0.00 ]
+Key: VPANDQZrrk:  [ 0.00  0.00 ]
+Key: VPANDQZrrkz:  [ 0.00  0.00 ]
+Key: VPANDYrm:  [ 0.00  0.00 ]
+Key: VPANDYrr:  [ 0.00  0.00 ]
+Key: VPANDrm:  [ 0.00  0.00 ]
+Key: VPANDrr:  [ 0.00  0.00 ]
+Key: VPAVGBYrm:  [ 0.00  0.00 ]
+Key: VPAVGBYrr:  [ 0.00  0.00 ]
+Key: VPAVGBZ:  [ 0.00  0.00 ]
+Key: VPAVGBZrm:  [ 0.00  0.00 ]
+Key: VPAVGBZrmk:  [ 0.00  0.00 ]
+Key: VPAVGBZrmkz:  [ 0.00  0.00 ]
+Key: VPAVGBZrr:  [ 0.00  0.00 ]
+Key: VPAVGBZrrk:  [ 0.00  0.00 ]
+Key: VPAVGBZrrkz:  [ 0.00  0.00 ]
+Key: VPAVGBrm:  [ 0.00  0.00 ]
+Key: VPAVGBrr:  [ 0.00  0.00 ]
+Key: VPAVGWYrm:  [ 0.00  0.00 ]
+Key: VPAVGWYrr:  [ 0.00  0.00 ]
+Key: VPAVGWZ:  [ 0.00  0.00 ]
+Key: VPAVGWZrm:  [ 0.00  0.00 ]
+Key: VPAVGWZrmk:  [ 0.00  0.00 ]
+Key: VPAVGWZrmkz:  [ 0.00  0.00 ]
+Key: VPAVGWZrr:  [ 0.00  0.00 ]
+Key: VPAVGWZrrk:  [ 0.00  0.00 ]
+Key: VPAVGWZrrkz:  [ 0.00  0.00 ]
+Key: VPAVGWrm:  [ 0.00  0.00 ]
+Key: VPAVGWrr:  [ 0.00  0.00 ]
+Key: VPBLENDDYrmi:  [ 0.00  0.00 ]
+Key: VPBLENDDYrri:  [ 0.00  0.00 ]
+Key: VPBLENDDrmi:  [ 0.00  0.00 ]
+Key: VPBLENDDrri:  [ 0.00  0.00 ]
+Key: VPBLENDMBZ:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrm:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrmk:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrmkz:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrr:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrrk:  [ 0.00  0.00 ]
+Key: VPBLENDMBZrrkz:  [ 0.00  0.00 ]
+Key: VPBLENDMDZ:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrm:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmb:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmbk:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmbkz:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmk:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrmkz:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrr:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrrk:  [ 0.00  0.00 ]
+Key: VPBLENDMDZrrkz:  [ 0.00  0.00 ]
+Key: VPBLENDMQZ:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrm:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmb:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmbk:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmbkz:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmk:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrmkz:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrr:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrrk:  [ 0.00  0.00 ]
+Key: VPBLENDMQZrrkz:  [ 0.00  0.00 ]
+Key: VPBLENDMWZ:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrm:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrmk:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrmkz:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrr:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrrk:  [ 0.00  0.00 ]
+Key: VPBLENDMWZrrkz:  [ 0.00  0.00 ]
+Key: VPBLENDVBYrmr:  [ 0.00  0.00 ]
+Key: VPBLENDVBYrrr:  [ 0.00  0.00 ]
+Key: VPBLENDVBrmr:  [ 0.00  0.00 ]
+Key: VPBLENDVBrrr:  [ 0.00  0.00 ]
+Key: VPBLENDWYrmi:  [ 0.00  0.00 ]
+Key: VPBLENDWYrri:  [ 0.00  0.00 ]
+Key: VPBLENDWrmi:  [ 0.00  0.00 ]
+Key: VPBLENDWrri:  [ 0.00  0.00 ]
+Key: VPBROADCASTBYrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTBYrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrmk:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrmkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTBZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTBrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTDYrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTDYrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrmk:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrmkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTDZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTDrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTMB:  [ 0.00  0.00 ]
+Key: VPBROADCASTMW:  [ 0.00  0.00 ]
+Key: VPBROADCASTQYrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTQYrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrmk:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrmkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTQZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTQrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTWYrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTWYrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrmk:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrmkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTWZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrZ:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrZrr:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrZrrk:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrZrrkz:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrm:  [ 0.00  0.00 ]
+Key: VPBROADCASTWrr:  [ 0.00  0.00 ]
+Key: VPCLMULQDQYrmi:  [ 0.00  0.00 ]
+Key: VPCLMULQDQYrri:  [ 0.00  0.00 ]
+Key: VPCLMULQDQZ:  [ 0.00  0.00 ]
+Key: VPCLMULQDQZrmi:  [ 0.00  0.00 ]
+Key: VPCLMULQDQZrri:  [ 0.00  0.00 ]
+Key: VPCLMULQDQrmi:  [ 0.00  0.00 ]
+Key: VPCLMULQDQrri:  [ 0.00  0.00 ]
+Key: VPCMOVYrmr:  [ 0.00  0.00 ]
+Key: VPCMOVYrrm:  [ 0.00  0.00 ]
+Key: VPCMOVYrrr:  [ 0.00  0.00 ]
+Key: VPCMOVYrrr_REV:  [ 0.00  0.00 ]
+Key: VPCMOVrmr:  [ 0.00  0.00 ]
+Key: VPCMOVrrm:  [ 0.00  0.00 ]
+Key: VPCMOVrrr:  [ 0.00  0.00 ]
+Key: VPCMOVrrr_REV:  [ 0.00  0.00 ]
+Key: VPCMPBZ:  [ 0.00  0.00 ]
+Key: VPCMPBZrmi:  [ 0.00  0.00 ]
+Key: VPCMPBZrmik:  [ 0.00  0.00 ]
+Key: VPCMPBZrri:  [ 0.00  0.00 ]
+Key: VPCMPBZrrik:  [ 0.00  0.00 ]
+Key: VPCMPDZ:  [ 0.00  0.00 ]
+Key: VPCMPDZrmbi:  [ 0.00  0.00 ]
+Key: VPCMPDZrmbik:  [ 0.00  0.00 ]
+Key: VPCMPDZrmi:  [ 0.00  0.00 ]
+Key: VPCMPDZrmik:  [ 0.00  0.00 ]
+Key: VPCMPDZrri:  [ 0.00  0.00 ]
+Key: VPCMPDZrrik:  [ 0.00  0.00 ]
+Key: VPCMPEQBYrm:  [ 0.00  0.00 ]
+Key: VPCMPEQBYrr:  [ 0.00  0.00 ]
+Key: VPCMPEQBZ:  [ 0.00  0.00 ]
+Key: VPCMPEQBZrm:  [ 0.00  0.00 ]
+Key: VPCMPEQBZrmk:  [ 0.00  0.00 ]
+Key: VPCMPEQBZrr:  [ 0.00  0.00 ]
+Key: VPCMPEQBZrrk:  [ 0.00  0.00 ]
+Key: VPCMPEQBrm:  [ 0.00  0.00 ]
+Key: VPCMPEQBrr:  [ 0.00  0.00 ]
+Key: VPCMPEQDYrm:  [ 0.00  0.00 ]
+Key: VPCMPEQDYrr:  [ 0.00  0.00 ]
+Key: VPCMPEQDZ:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrm:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrmb:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrmbk:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrmk:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrr:  [ 0.00  0.00 ]
+Key: VPCMPEQDZrrk:  [ 0.00  0.00 ]
+Key: VPCMPEQDrm:  [ 0.00  0.00 ]
+Key: VPCMPEQDrr:  [ 0.00  0.00 ]
+Key: VPCMPEQQYrm:  [ 0.00  0.00 ]
+Key: VPCMPEQQYrr:  [ 0.00  0.00 ]
+Key: VPCMPEQQZ:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrm:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrmb:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrmbk:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrmk:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrr:  [ 0.00  0.00 ]
+Key: VPCMPEQQZrrk:  [ 0.00  0.00 ]
+Key: VPCMPEQQrm:  [ 0.00  0.00 ]
+Key: VPCMPEQQrr:  [ 0.00  0.00 ]
+Key: VPCMPEQWYrm:  [ 0.00  0.00 ]
+Key: VPCMPEQWYrr:  [ 0.00  0.00 ]
+Key: VPCMPEQWZ:  [ 0.00  0.00 ]
+Key: VPCMPEQWZrm:  [ 0.00  0.00 ]
+Key: VPCMPEQWZrmk:  [ 0.00  0.00 ]
+Key: VPCMPEQWZrr:  [ 0.00  0.00 ]
+Key: VPCMPEQWZrrk:  [ 0.00  0.00 ]
+Key: VPCMPEQWrm:  [ 0.00  0.00 ]
+Key: VPCMPEQWrr:  [ 0.00  0.00 ]
+Key: VPCMPESTRIrmi:  [ 0.00  0.00 ]
+Key: VPCMPESTRIrri:  [ 0.00  0.00 ]
+Key: VPCMPESTRMrmi:  [ 0.00  0.00 ]
+Key: VPCMPESTRMrri:  [ 0.00  0.00 ]
+Key: VPCMPGTBYrm:  [ 0.00  0.00 ]
+Key: VPCMPGTBYrr:  [ 0.00  0.00 ]
+Key: VPCMPGTBZ:  [ 0.00  0.00 ]
+Key: VPCMPGTBZrm:  [ 0.00  0.00 ]
+Key: VPCMPGTBZrmk:  [ 0.00  0.00 ]
+Key: VPCMPGTBZrr:  [ 0.00  0.00 ]
+Key: VPCMPGTBZrrk:  [ 0.00  0.00 ]
+Key: VPCMPGTBrm:  [ 0.00  0.00 ]
+Key: VPCMPGTBrr:  [ 0.00  0.00 ]
+Key: VPCMPGTDYrm:  [ 0.00  0.00 ]
+Key: VPCMPGTDYrr:  [ 0.00  0.00 ]
+Key: VPCMPGTDZ:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrm:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrmb:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrmbk:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrmk:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrr:  [ 0.00  0.00 ]
+Key: VPCMPGTDZrrk:  [ 0.00  0.00 ]
+Key: VPCMPGTDrm:  [ 0.00  0.00 ]
+Key: VPCMPGTDrr:  [ 0.00  0.00 ]
+Key: VPCMPGTQYrm:  [ 0.00  0.00 ]
+Key: VPCMPGTQYrr:  [ 0.00  0.00 ]
+Key: VPCMPGTQZ:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrm:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrmb:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrmbk:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrmk:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrr:  [ 0.00  0.00 ]
+Key: VPCMPGTQZrrk:  [ 0.00  0.00 ]
+Key: VPCMPGTQrm:  [ 0.00  0.00 ]
+Key: VPCMPGTQrr:  [ 0.00  0.00 ]
+Key: VPCMPGTWYrm:  [ 0.00  0.00 ]
+Key: VPCMPGTWYrr:  [ 0.00  0.00 ]
+Key: VPCMPGTWZ:  [ 0.00  0.00 ]
+Key: VPCMPGTWZrm:  [ 0.00  0.00 ]
+Key: VPCMPGTWZrmk:  [ 0.00  0.00 ]
+Key: VPCMPGTWZrr:  [ 0.00  0.00 ]
+Key: VPCMPGTWZrrk:  [ 0.00  0.00 ]
+Key: VPCMPGTWrm:  [ 0.00  0.00 ]
+Key: VPCMPGTWrr:  [ 0.00  0.00 ]
+Key: VPCMPISTRIrmi:  [ 0.00  0.00 ]
+Key: VPCMPISTRIrri:  [ 0.00  0.00 ]
+Key: VPCMPISTRMrmi:  [ 0.00  0.00 ]
+Key: VPCMPISTRMrri:  [ 0.00  0.00 ]
+Key: VPCMPQZ:  [ 0.00  0.00 ]
+Key: VPCMPQZrmbi:  [ 0.00  0.00 ]
+Key: VPCMPQZrmbik:  [ 0.00  0.00 ]
+Key: VPCMPQZrmi:  [ 0.00  0.00 ]
+Key: VPCMPQZrmik:  [ 0.00  0.00 ]
+Key: VPCMPQZrri:  [ 0.00  0.00 ]
+Key: VPCMPQZrrik:  [ 0.00  0.00 ]
+Key: VPCMPUBZ:  [ 0.00  0.00 ]
+Key: VPCMPUBZrmi:  [ 0.00  0.00 ]
+Key: VPCMPUBZrmik:  [ 0.00  0.00 ]
+Key: VPCMPUBZrri:  [ 0.00  0.00 ]
+Key: VPCMPUBZrrik:  [ 0.00  0.00 ]
+Key: VPCMPUDZ:  [ 0.00  0.00 ]
+Key: VPCMPUDZrmbi:  [ 0.00  0.00 ]
+Key: VPCMPUDZrmbik:  [ 0.00  0.00 ]
+Key: VPCMPUDZrmi:  [ 0.00  0.00 ]
+Key: VPCMPUDZrmik:  [ 0.00  0.00 ]
+Key: VPCMPUDZrri:  [ 0.00  0.00 ]
+Key: VPCMPUDZrrik:  [ 0.00  0.00 ]
+Key: VPCMPUQZ:  [ 0.00  0.00 ]
+Key: VPCMPUQZrmbi:  [ 0.00  0.00 ]
+Key: VPCMPUQZrmbik:  [ 0.00  0.00 ]
+Key: VPCMPUQZrmi:  [ 0.00  0.00 ]
+Key: VPCMPUQZrmik:  [ 0.00  0.00 ]
+Key: VPCMPUQZrri:  [ 0.00  0.00 ]
+Key: VPCMPUQZrrik:  [ 0.00  0.00 ]
+Key: VPCMPUWZ:  [ 0.00  0.00 ]
+Key: VPCMPUWZrmi:  [ 0.00  0.00 ]
+Key: VPCMPUWZrmik:  [ 0.00  0.00 ]
+Key: VPCMPUWZrri:  [ 0.00  0.00 ]
+Key: VPCMPUWZrrik:  [ 0.00  0.00 ]
+Key: VPCMPWZ:  [ 0.00  0.00 ]
+Key: VPCMPWZrmi:  [ 0.00  0.00 ]
+Key: VPCMPWZrmik:  [ 0.00  0.00 ]
+Key: VPCMPWZrri:  [ 0.00  0.00 ]
+Key: VPCMPWZrrik:  [ 0.00  0.00 ]
+Key: VPCOMBmi:  [ 0.00  0.00 ]
+Key: VPCOMBri:  [ 0.00  0.00 ]
+Key: VPCOMDmi:  [ 0.00  0.00 ]
+Key: VPCOMDri:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZ:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZmr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZmrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZrr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZrrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSBZrrkz:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZ:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZmr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZmrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZrr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZrrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSDZrrkz:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZ:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZmr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZmrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZrr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZrrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSQZrrkz:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZ:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZmr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZmrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZrr:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZrrk:  [ 0.00  0.00 ]
+Key: VPCOMPRESSWZrrkz:  [ 0.00  0.00 ]
+Key: VPCOMQmi:  [ 0.00  0.00 ]
+Key: VPCOMQri:  [ 0.00  0.00 ]
+Key: VPCOMUBmi:  [ 0.00  0.00 ]
+Key: VPCOMUBri:  [ 0.00  0.00 ]
+Key: VPCOMUDmi:  [ 0.00  0.00 ]
+Key: VPCOMUDri:  [ 0.00  0.00 ]
+Key: VPCOMUQmi:  [ 0.00  0.00 ]
+Key: VPCOMUQri:  [ 0.00  0.00 ]
+Key: VPCOMUWmi:  [ 0.00  0.00 ]
+Key: VPCOMUWri:  [ 0.00  0.00 ]
+Key: VPCOMWmi:  [ 0.00  0.00 ]
+Key: VPCOMWri:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZ:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrm:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmb:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmbk:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmbkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmk:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrmkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrr:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrrk:  [ 0.00  0.00 ]
+Key: VPCONFLICTDZrrkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZ:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrm:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmb:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmbk:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmbkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmk:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrmkz:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrr:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrrk:  [ 0.00  0.00 ]
+Key: VPCONFLICTQZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDSYrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDSYrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZ:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPBSSDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDSrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDSrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDYrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDYrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDZ:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmb:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmk:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrr:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrrk:  [ 0.00  0.00 ]
+Key: VPDPBSSDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSSDrm:  [ 0.00  0.00 ]
+Key: VPDPBSSDrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDSYrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDSYrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZ:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPBSUDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDSrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDSrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDYrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDYrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDZ:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmb:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmk:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrr:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrrk:  [ 0.00  0.00 ]
+Key: VPDPBSUDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBSUDrm:  [ 0.00  0.00 ]
+Key: VPDPBSUDrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDSYrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDSYrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZ:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPBUSDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDSrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDSrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDYrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDYrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDZ:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmb:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmk:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrr:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrrk:  [ 0.00  0.00 ]
+Key: VPDPBUSDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBUSDrm:  [ 0.00  0.00 ]
+Key: VPDPBUSDrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDSYrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDSYrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZ:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPBUUDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDSrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDSrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDYrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDYrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDZ:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmb:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmk:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrr:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrrk:  [ 0.00  0.00 ]
+Key: VPDPBUUDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPBUUDrm:  [ 0.00  0.00 ]
+Key: VPDPBUUDrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDSYrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDSYrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZ:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPWSSDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDSrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDSrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDYrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDYrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDZ:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmb:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmk:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrr:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrrk:  [ 0.00  0.00 ]
+Key: VPDPWSSDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWSSDrm:  [ 0.00  0.00 ]
+Key: VPDPWSSDrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDSYrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDSYrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZ:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPWSUDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDSrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDSrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDYrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDYrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDZ:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmb:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmk:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrr:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrrk:  [ 0.00  0.00 ]
+Key: VPDPWSUDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWSUDrm:  [ 0.00  0.00 ]
+Key: VPDPWSUDrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDSYrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDSYrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZ:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPWUSDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDSrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDSrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDYrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDYrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDZ:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmb:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmk:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrr:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrrk:  [ 0.00  0.00 ]
+Key: VPDPWUSDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWUSDrm:  [ 0.00  0.00 ]
+Key: VPDPWUSDrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDSYrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDSYrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZ:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmb:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmk:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrrk:  [ 0.00  0.00 ]
+Key: VPDPWUUDSZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDSrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDSrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDYrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDYrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDZ:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmb:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmbk:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmk:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrmkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrr:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrrk:  [ 0.00  0.00 ]
+Key: VPDPWUUDZrrkz:  [ 0.00  0.00 ]
+Key: VPDPWUUDrm:  [ 0.00  0.00 ]
+Key: VPDPWUUDrr:  [ 0.00  0.00 ]
+Key: VPERM:  [ 0.00  0.00 ]
+Key: VPERMBZ:  [ 0.00  0.00 ]
+Key: VPERMBZrm:  [ 0.00  0.00 ]
+Key: VPERMBZrmk:  [ 0.00  0.00 ]
+Key: VPERMBZrmkz:  [ 0.00  0.00 ]
+Key: VPERMBZrr:  [ 0.00  0.00 ]
+Key: VPERMBZrrk:  [ 0.00  0.00 ]
+Key: VPERMBZrrkz:  [ 0.00  0.00 ]
+Key: VPERMDYrm:  [ 0.00  0.00 ]
+Key: VPERMDYrr:  [ 0.00  0.00 ]
+Key: VPERMDZ:  [ 0.00  0.00 ]
+Key: VPERMDZrm:  [ 0.00  0.00 ]
+Key: VPERMDZrmb:  [ 0.00  0.00 ]
+Key: VPERMDZrmbk:  [ 0.00  0.00 ]
+Key: VPERMDZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMDZrmk:  [ 0.00  0.00 ]
+Key: VPERMDZrmkz:  [ 0.00  0.00 ]
+Key: VPERMDZrr:  [ 0.00  0.00 ]
+Key: VPERMDZrrk:  [ 0.00  0.00 ]
+Key: VPERMDZrrkz:  [ 0.00  0.00 ]
+Key: VPERMI:  [ 0.00  0.00 ]
+Key: VPERMIL:  [ 0.00  0.00 ]
+Key: VPERMILPDYmi:  [ 0.00  0.00 ]
+Key: VPERMILPDYri:  [ 0.00  0.00 ]
+Key: VPERMILPDYrm:  [ 0.00  0.00 ]
+Key: VPERMILPDYrr:  [ 0.00  0.00 ]
+Key: VPERMILPDZ:  [ 0.00  0.00 ]
+Key: VPERMILPDZmbi:  [ 0.00  0.00 ]
+Key: VPERMILPDZmbik:  [ 0.00  0.00 ]
+Key: VPERMILPDZmbikz:  [ 0.00  0.00 ]
+Key: VPERMILPDZmi:  [ 0.00  0.00 ]
+Key: VPERMILPDZmik:  [ 0.00  0.00 ]
+Key: VPERMILPDZmikz:  [ 0.00  0.00 ]
+Key: VPERMILPDZri:  [ 0.00  0.00 ]
+Key: VPERMILPDZrik:  [ 0.00  0.00 ]
+Key: VPERMILPDZrikz:  [ 0.00  0.00 ]
+Key: VPERMILPDZrm:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmb:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmbk:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmk:  [ 0.00  0.00 ]
+Key: VPERMILPDZrmkz:  [ 0.00  0.00 ]
+Key: VPERMILPDZrr:  [ 0.00  0.00 ]
+Key: VPERMILPDZrrk:  [ 0.00  0.00 ]
+Key: VPERMILPDZrrkz:  [ 0.00  0.00 ]
+Key: VPERMILPDmi:  [ 0.00  0.00 ]
+Key: VPERMILPDri:  [ 0.00  0.00 ]
+Key: VPERMILPDrm:  [ 0.00  0.00 ]
+Key: VPERMILPDrr:  [ 0.00  0.00 ]
+Key: VPERMILPSYmi:  [ 0.00  0.00 ]
+Key: VPERMILPSYri:  [ 0.00  0.00 ]
+Key: VPERMILPSYrm:  [ 0.00  0.00 ]
+Key: VPERMILPSYrr:  [ 0.00  0.00 ]
+Key: VPERMILPSZ:  [ 0.00  0.00 ]
+Key: VPERMILPSZmbi:  [ 0.00  0.00 ]
+Key: VPERMILPSZmbik:  [ 0.00  0.00 ]
+Key: VPERMILPSZmbikz:  [ 0.00  0.00 ]
+Key: VPERMILPSZmi:  [ 0.00  0.00 ]
+Key: VPERMILPSZmik:  [ 0.00  0.00 ]
+Key: VPERMILPSZmikz:  [ 0.00  0.00 ]
+Key: VPERMILPSZri:  [ 0.00  0.00 ]
+Key: VPERMILPSZrik:  [ 0.00  0.00 ]
+Key: VPERMILPSZrikz:  [ 0.00  0.00 ]
+Key: VPERMILPSZrm:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmb:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmbk:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmk:  [ 0.00  0.00 ]
+Key: VPERMILPSZrmkz:  [ 0.00  0.00 ]
+Key: VPERMILPSZrr:  [ 0.00  0.00 ]
+Key: VPERMILPSZrrk:  [ 0.00  0.00 ]
+Key: VPERMILPSZrrkz:  [ 0.00  0.00 ]
+Key: VPERMILPSmi:  [ 0.00  0.00 ]
+Key: VPERMILPSri:  [ 0.00  0.00 ]
+Key: VPERMILPSrm:  [ 0.00  0.00 ]
+Key: VPERMILPSrr:  [ 0.00  0.00 ]
+Key: VPERMPDYmi:  [ 0.00  0.00 ]
+Key: VPERMPDYri:  [ 0.00  0.00 ]
+Key: VPERMPDZ:  [ 0.00  0.00 ]
+Key: VPERMPDZmbi:  [ 0.00  0.00 ]
+Key: VPERMPDZmbik:  [ 0.00  0.00 ]
+Key: VPERMPDZmbikz:  [ 0.00  0.00 ]
+Key: VPERMPDZmi:  [ 0.00  0.00 ]
+Key: VPERMPDZmik:  [ 0.00  0.00 ]
+Key: VPERMPDZmikz:  [ 0.00  0.00 ]
+Key: VPERMPDZri:  [ 0.00  0.00 ]
+Key: VPERMPDZrik:  [ 0.00  0.00 ]
+Key: VPERMPDZrikz:  [ 0.00  0.00 ]
+Key: VPERMPDZrm:  [ 0.00  0.00 ]
+Key: VPERMPDZrmb:  [ 0.00  0.00 ]
+Key: VPERMPDZrmbk:  [ 0.00  0.00 ]
+Key: VPERMPDZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMPDZrmk:  [ 0.00  0.00 ]
+Key: VPERMPDZrmkz:  [ 0.00  0.00 ]
+Key: VPERMPDZrr:  [ 0.00  0.00 ]
+Key: VPERMPDZrrk:  [ 0.00  0.00 ]
+Key: VPERMPDZrrkz:  [ 0.00  0.00 ]
+Key: VPERMPSYrm:  [ 0.00  0.00 ]
+Key: VPERMPSYrr:  [ 0.00  0.00 ]
+Key: VPERMPSZ:  [ 0.00  0.00 ]
+Key: VPERMPSZrm:  [ 0.00  0.00 ]
+Key: VPERMPSZrmb:  [ 0.00  0.00 ]
+Key: VPERMPSZrmbk:  [ 0.00  0.00 ]
+Key: VPERMPSZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMPSZrmk:  [ 0.00  0.00 ]
+Key: VPERMPSZrmkz:  [ 0.00  0.00 ]
+Key: VPERMPSZrr:  [ 0.00  0.00 ]
+Key: VPERMPSZrrk:  [ 0.00  0.00 ]
+Key: VPERMPSZrrkz:  [ 0.00  0.00 ]
+Key: VPERMQYmi:  [ 0.00  0.00 ]
+Key: VPERMQYri:  [ 0.00  0.00 ]
+Key: VPERMQZ:  [ 0.00  0.00 ]
+Key: VPERMQZmbi:  [ 0.00  0.00 ]
+Key: VPERMQZmbik:  [ 0.00  0.00 ]
+Key: VPERMQZmbikz:  [ 0.00  0.00 ]
+Key: VPERMQZmi:  [ 0.00  0.00 ]
+Key: VPERMQZmik:  [ 0.00  0.00 ]
+Key: VPERMQZmikz:  [ 0.00  0.00 ]
+Key: VPERMQZri:  [ 0.00  0.00 ]
+Key: VPERMQZrik:  [ 0.00  0.00 ]
+Key: VPERMQZrikz:  [ 0.00  0.00 ]
+Key: VPERMQZrm:  [ 0.00  0.00 ]
+Key: VPERMQZrmb:  [ 0.00  0.00 ]
+Key: VPERMQZrmbk:  [ 0.00  0.00 ]
+Key: VPERMQZrmbkz:  [ 0.00  0.00 ]
+Key: VPERMQZrmk:  [ 0.00  0.00 ]
+Key: VPERMQZrmkz:  [ 0.00  0.00 ]
+Key: VPERMQZrr:  [ 0.00  0.00 ]
+Key: VPERMQZrrk:  [ 0.00  0.00 ]
+Key: VPERMQZrrkz:  [ 0.00  0.00 ]
+Key: VPERMT:  [ 0.00  0.00 ]
+Key: VPERMWZ:  [ 0.00  0.00 ]
+Key: VPERMWZrm:  [ 0.00  0.00 ]
+Key: VPERMWZrmk:  [ 0.00  0.00 ]
+Key: VPERMWZrmkz:  [ 0.00  0.00 ]
+Key: VPERMWZrr:  [ 0.00  0.00 ]
+Key: VPERMWZrrk:  [ 0.00  0.00 ]
+Key: VPERMWZrrkz:  [ 0.00  0.00 ]
+Key: VPEXPANDBZ:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrm:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrmk:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrmkz:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrr:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrrk:  [ 0.00  0.00 ]
+Key: VPEXPANDBZrrkz:  [ 0.00  0.00 ]
+Key: VPEXPANDDZ:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrm:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrmk:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrmkz:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrr:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrrk:  [ 0.00  0.00 ]
+Key: VPEXPANDDZrrkz:  [ 0.00  0.00 ]
+Key: VPEXPANDQZ:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrm:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrmk:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrmkz:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrr:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrrk:  [ 0.00  0.00 ]
+Key: VPEXPANDQZrrkz:  [ 0.00  0.00 ]
+Key: VPEXPANDWZ:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrm:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrmk:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrmkz:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrr:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrrk:  [ 0.00  0.00 ]
+Key: VPEXPANDWZrrkz:  [ 0.00  0.00 ]
+Key: VPEXTRBZmri:  [ 0.00  0.00 ]
+Key: VPEXTRBZrri:  [ 0.00  0.00 ]
+Key: VPEXTRBmri:  [ 0.00  0.00 ]
+Key: VPEXTRBrri:  [ 0.00  0.00 ]
+Key: VPEXTRDZmri:  [ 0.00  0.00 ]
+Key: VPEXTRDZrri:  [ 0.00  0.00 ]
+Key: VPEXTRDmri:  [ 0.00  0.00 ]
+Key: VPEXTRDrri:  [ 0.00  0.00 ]
+Key: VPEXTRQZmri:  [ 0.00  0.00 ]
+Key: VPEXTRQZrri:  [ 0.00  0.00 ]
+Key: VPEXTRQmri:  [ 0.00  0.00 ]
+Key: VPEXTRQrri:  [ 0.00  0.00 ]
+Key: VPEXTRWZmri:  [ 0.00  0.00 ]
+Key: VPEXTRWZrri:  [ 0.00  0.00 ]
+Key: VPEXTRWZrri_REV:  [ 0.00  0.00 ]
+Key: VPEXTRWmri:  [ 0.00  0.00 ]
+Key: VPEXTRWrri:  [ 0.00  0.00 ]
+Key: VPEXTRWrri_REV:  [ 0.00  0.00 ]
+Key: VPGATHERDDYrm:  [ 0.00  0.00 ]
+Key: VPGATHERDDZ:  [ 0.00  0.00 ]
+Key: VPGATHERDDZrm:  [ 0.00  0.00 ]
+Key: VPGATHERDDrm:  [ 0.00  0.00 ]
+Key: VPGATHERDQYrm:  [ 0.00  0.00 ]
+Key: VPGATHERDQZ:  [ 0.00  0.00 ]
+Key: VPGATHERDQZrm:  [ 0.00  0.00 ]
+Key: VPGATHERDQrm:  [ 0.00  0.00 ]
+Key: VPGATHERQDYrm:  [ 0.00  0.00 ]
+Key: VPGATHERQDZ:  [ 0.00  0.00 ]
+Key: VPGATHERQDZrm:  [ 0.00  0.00 ]
+Key: VPGATHERQDrm:  [ 0.00  0.00 ]
+Key: VPGATHERQQYrm:  [ 0.00  0.00 ]
+Key: VPGATHERQQZ:  [ 0.00  0.00 ]
+Key: VPGATHERQQZrm:  [ 0.00  0.00 ]
+Key: VPGATHERQQrm:  [ 0.00  0.00 ]
+Key: VPHADDBDrm:  [ 0.00  0.00 ]
+Key: VPHADDBDrr:  [ 0.00  0.00 ]
+Key: VPHADDBQrm:  [ 0.00  0.00 ]
+Key: VPHADDBQrr:  [ 0.00  0.00 ]
+Key: VPHADDBWrm:  [ 0.00  0.00 ]
+Key: VPHADDBWrr:  [ 0.00  0.00 ]
+Key: VPHADDDQrm:  [ 0.00  0.00 ]
+Key: VPHADDDQrr:  [ 0.00  0.00 ]
+Key: VPHADDDYrm:  [ 0.00  0.00 ]
+Key: VPHADDDYrr:  [ 0.00  0.00 ]
+Key: VPHADDDrm:  [ 0.00  0.00 ]
+Key: VPHADDDrr:  [ 0.00  0.00 ]
+Key: VPHADDSWYrm:  [ 0.00  0.00 ]
+Key: VPHADDSWYrr:  [ 0.00  0.00 ]
+Key: VPHADDSWrm:  [ 0.00  0.00 ]
+Key: VPHADDSWrr:  [ 0.00  0.00 ]
+Key: VPHADDUBDrm:  [ 0.00  0.00 ]
+Key: VPHADDUBDrr:  [ 0.00  0.00 ]
+Key: VPHADDUBQrm:  [ 0.00  0.00 ]
+Key: VPHADDUBQrr:  [ 0.00  0.00 ]
+Key: VPHADDUBWrm:  [ 0.00  0.00 ]
+Key: VPHADDUBWrr:  [ 0.00  0.00 ]
+Key: VPHADDUDQrm:  [ 0.00  0.00 ]
+Key: VPHADDUDQrr:  [ 0.00  0.00 ]
+Key: VPHADDUWDrm:  [ 0.00  0.00 ]
+Key: VPHADDUWDrr:  [ 0.00  0.00 ]
+Key: VPHADDUWQrm:  [ 0.00  0.00 ]
+Key: VPHADDUWQrr:  [ 0.00  0.00 ]
+Key: VPHADDWDrm:  [ 0.00  0.00 ]
+Key: VPHADDWDrr:  [ 0.00  0.00 ]
+Key: VPHADDWQrm:  [ 0.00  0.00 ]
+Key: VPHADDWQrr:  [ 0.00  0.00 ]
+Key: VPHADDWYrm:  [ 0.00  0.00 ]
+Key: VPHADDWYrr:  [ 0.00  0.00 ]
+Key: VPHADDWrm:  [ 0.00  0.00 ]
+Key: VPHADDWrr:  [ 0.00  0.00 ]
+Key: VPHMINPOSUWrm:  [ 0.00  0.00 ]
+Key: VPHMINPOSUWrr:  [ 0.00  0.00 ]
+Key: VPHSUBBWrm:  [ 0.00  0.00 ]
+Key: VPHSUBBWrr:  [ 0.00  0.00 ]
+Key: VPHSUBDQrm:  [ 0.00  0.00 ]
+Key: VPHSUBDQrr:  [ 0.00  0.00 ]
+Key: VPHSUBDYrm:  [ 0.00  0.00 ]
+Key: VPHSUBDYrr:  [ 0.00  0.00 ]
+Key: VPHSUBDrm:  [ 0.00  0.00 ]
+Key: VPHSUBDrr:  [ 0.00  0.00 ]
+Key: VPHSUBSWYrm:  [ 0.00  0.00 ]
+Key: VPHSUBSWYrr:  [ 0.00  0.00 ]
+Key: VPHSUBSWrm:  [ 0.00  0.00 ]
+Key: VPHSUBSWrr:  [ 0.00  0.00 ]
+Key: VPHSUBWDrm:  [ 0.00  0.00 ]
+Key: VPHSUBWDrr:  [ 0.00  0.00 ]
+Key: VPHSUBWYrm:  [ 0.00  0.00 ]
+Key: VPHSUBWYrr:  [ 0.00  0.00 ]
+Key: VPHSUBWrm:  [ 0.00  0.00 ]
+Key: VPHSUBWrr:  [ 0.00  0.00 ]
+Key: VPINSRBZrmi:  [ 0.00  0.00 ]
+Key: VPINSRBZrri:  [ 0.00  0.00 ]
+Key: VPINSRBrmi:  [ 0.00  0.00 ]
+Key: VPINSRBrri:  [ 0.00  0.00 ]
+Key: VPINSRDZrmi:  [ 0.00  0.00 ]
+Key: VPINSRDZrri:  [ 0.00  0.00 ]
+Key: VPINSRDrmi:  [ 0.00  0.00 ]
+Key: VPINSRDrri:  [ 0.00  0.00 ]
+Key: VPINSRQZrmi:  [ 0.00  0.00 ]
+Key: VPINSRQZrri:  [ 0.00  0.00 ]
+Key: VPINSRQrmi:  [ 0.00  0.00 ]
+Key: VPINSRQrri:  [ 0.00  0.00 ]
+Key: VPINSRWZrmi:  [ 0.00  0.00 ]
+Key: VPINSRWZrri:  [ 0.00  0.00 ]
+Key: VPINSRWrmi:  [ 0.00  0.00 ]
+Key: VPINSRWrri:  [ 0.00  0.00 ]
+Key: VPLZCNTDZ:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrm:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmb:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmbk:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmbkz:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmk:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrmkz:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrr:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrrk:  [ 0.00  0.00 ]
+Key: VPLZCNTDZrrkz:  [ 0.00  0.00 ]
+Key: VPLZCNTQZ:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrm:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmb:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmbk:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmbkz:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmk:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrmkz:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrr:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrrk:  [ 0.00  0.00 ]
+Key: VPLZCNTQZrrkz:  [ 0.00  0.00 ]
+Key: VPMACSDDrm:  [ 0.00  0.00 ]
+Key: VPMACSDDrr:  [ 0.00  0.00 ]
+Key: VPMACSDQHrm:  [ 0.00  0.00 ]
+Key: VPMACSDQHrr:  [ 0.00  0.00 ]
+Key: VPMACSDQLrm:  [ 0.00  0.00 ]
+Key: VPMACSDQLrr:  [ 0.00  0.00 ]
+Key: VPMACSSDDrm:  [ 0.00  0.00 ]
+Key: VPMACSSDDrr:  [ 0.00  0.00 ]
+Key: VPMACSSDQHrm:  [ 0.00  0.00 ]
+Key: VPMACSSDQHrr:  [ 0.00  0.00 ]
+Key: VPMACSSDQLrm:  [ 0.00  0.00 ]
+Key: VPMACSSDQLrr:  [ 0.00  0.00 ]
+Key: VPMACSSWDrm:  [ 0.00  0.00 ]
+Key: VPMACSSWDrr:  [ 0.00  0.00 ]
+Key: VPMACSSWWrm:  [ 0.00  0.00 ]
+Key: VPMACSSWWrr:  [ 0.00  0.00 ]
+Key: VPMACSWDrm:  [ 0.00  0.00 ]
+Key: VPMACSWDrr:  [ 0.00  0.00 ]
+Key: VPMACSWWrm:  [ 0.00  0.00 ]
+Key: VPMACSWWrr:  [ 0.00  0.00 ]
+Key: VPMADCSSWDrm:  [ 0.00  0.00 ]
+Key: VPMADCSSWDrr:  [ 0.00  0.00 ]
+Key: VPMADCSWDrm:  [ 0.00  0.00 ]
+Key: VPMADCSWDrr:  [ 0.00  0.00 ]
+Key: VPMADD:  [ 0.00  0.00 ]
+Key: VPMADDUBSWYrm:  [ 0.00  0.00 ]
+Key: VPMADDUBSWYrr:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZ:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrm:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrmk:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrmkz:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrr:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrrk:  [ 0.00  0.00 ]
+Key: VPMADDUBSWZrrkz:  [ 0.00  0.00 ]
+Key: VPMADDUBSWrm:  [ 0.00  0.00 ]
+Key: VPMADDUBSWrr:  [ 0.00  0.00 ]
+Key: VPMADDWDYrm:  [ 0.00  0.00 ]
+Key: VPMADDWDYrr:  [ 0.00  0.00 ]
+Key: VPMADDWDZ:  [ 0.00  0.00 ]
+Key: VPMADDWDZrm:  [ 0.00  0.00 ]
+Key: VPMADDWDZrmk:  [ 0.00  0.00 ]
+Key: VPMADDWDZrmkz:  [ 0.00  0.00 ]
+Key: VPMADDWDZrr:  [ 0.00  0.00 ]
+Key: VPMADDWDZrrk:  [ 0.00  0.00 ]
+Key: VPMADDWDZrrkz:  [ 0.00  0.00 ]
+Key: VPMADDWDrm:  [ 0.00  0.00 ]
+Key: VPMADDWDrr:  [ 0.00  0.00 ]
+Key: VPMASKMOVDYmr:  [ 0.00  0.00 ]
+Key: VPMASKMOVDYrm:  [ 0.00  0.00 ]
+Key: VPMASKMOVDmr:  [ 0.00  0.00 ]
+Key: VPMASKMOVDrm:  [ 0.00  0.00 ]
+Key: VPMASKMOVQYmr:  [ 0.00  0.00 ]
+Key: VPMASKMOVQYrm:  [ 0.00  0.00 ]
+Key: VPMASKMOVQmr:  [ 0.00  0.00 ]
+Key: VPMASKMOVQrm:  [ 0.00  0.00 ]
+Key: VPMAXSBYrm:  [ 0.00  0.00 ]
+Key: VPMAXSBYrr:  [ 0.00  0.00 ]
+Key: VPMAXSBZ:  [ 0.00  0.00 ]
+Key: VPMAXSBZrm:  [ 0.00  0.00 ]
+Key: VPMAXSBZrmk:  [ 0.00  0.00 ]
+Key: VPMAXSBZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXSBZrr:  [ 0.00  0.00 ]
+Key: VPMAXSBZrrk:  [ 0.00  0.00 ]
+Key: VPMAXSBZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXSBrm:  [ 0.00  0.00 ]
+Key: VPMAXSBrr:  [ 0.00  0.00 ]
+Key: VPMAXSDYrm:  [ 0.00  0.00 ]
+Key: VPMAXSDYrr:  [ 0.00  0.00 ]
+Key: VPMAXSDZ:  [ 0.00  0.00 ]
+Key: VPMAXSDZrm:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmb:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmbk:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmk:  [ 0.00  0.00 ]
+Key: VPMAXSDZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXSDZrr:  [ 0.00  0.00 ]
+Key: VPMAXSDZrrk:  [ 0.00  0.00 ]
+Key: VPMAXSDZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXSDrm:  [ 0.00  0.00 ]
+Key: VPMAXSDrr:  [ 0.00  0.00 ]
+Key: VPMAXSQZ:  [ 0.00  0.00 ]
+Key: VPMAXSQZrm:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmb:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmbk:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmk:  [ 0.00  0.00 ]
+Key: VPMAXSQZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXSQZrr:  [ 0.00  0.00 ]
+Key: VPMAXSQZrrk:  [ 0.00  0.00 ]
+Key: VPMAXSQZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXSWYrm:  [ 0.00  0.00 ]
+Key: VPMAXSWYrr:  [ 0.00  0.00 ]
+Key: VPMAXSWZ:  [ 0.00  0.00 ]
+Key: VPMAXSWZrm:  [ 0.00  0.00 ]
+Key: VPMAXSWZrmk:  [ 0.00  0.00 ]
+Key: VPMAXSWZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXSWZrr:  [ 0.00  0.00 ]
+Key: VPMAXSWZrrk:  [ 0.00  0.00 ]
+Key: VPMAXSWZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXSWrm:  [ 0.00  0.00 ]
+Key: VPMAXSWrr:  [ 0.00  0.00 ]
+Key: VPMAXUBYrm:  [ 0.00  0.00 ]
+Key: VPMAXUBYrr:  [ 0.00  0.00 ]
+Key: VPMAXUBZ:  [ 0.00  0.00 ]
+Key: VPMAXUBZrm:  [ 0.00  0.00 ]
+Key: VPMAXUBZrmk:  [ 0.00  0.00 ]
+Key: VPMAXUBZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXUBZrr:  [ 0.00  0.00 ]
+Key: VPMAXUBZrrk:  [ 0.00  0.00 ]
+Key: VPMAXUBZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXUBrm:  [ 0.00  0.00 ]
+Key: VPMAXUBrr:  [ 0.00  0.00 ]
+Key: VPMAXUDYrm:  [ 0.00  0.00 ]
+Key: VPMAXUDYrr:  [ 0.00  0.00 ]
+Key: VPMAXUDZ:  [ 0.00  0.00 ]
+Key: VPMAXUDZrm:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmb:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmbk:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmk:  [ 0.00  0.00 ]
+Key: VPMAXUDZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXUDZrr:  [ 0.00  0.00 ]
+Key: VPMAXUDZrrk:  [ 0.00  0.00 ]
+Key: VPMAXUDZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXUDrm:  [ 0.00  0.00 ]
+Key: VPMAXUDrr:  [ 0.00  0.00 ]
+Key: VPMAXUQZ:  [ 0.00  0.00 ]
+Key: VPMAXUQZrm:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmb:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmbk:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmk:  [ 0.00  0.00 ]
+Key: VPMAXUQZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXUQZrr:  [ 0.00  0.00 ]
+Key: VPMAXUQZrrk:  [ 0.00  0.00 ]
+Key: VPMAXUQZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXUWYrm:  [ 0.00  0.00 ]
+Key: VPMAXUWYrr:  [ 0.00  0.00 ]
+Key: VPMAXUWZ:  [ 0.00  0.00 ]
+Key: VPMAXUWZrm:  [ 0.00  0.00 ]
+Key: VPMAXUWZrmk:  [ 0.00  0.00 ]
+Key: VPMAXUWZrmkz:  [ 0.00  0.00 ]
+Key: VPMAXUWZrr:  [ 0.00  0.00 ]
+Key: VPMAXUWZrrk:  [ 0.00  0.00 ]
+Key: VPMAXUWZrrkz:  [ 0.00  0.00 ]
+Key: VPMAXUWrm:  [ 0.00  0.00 ]
+Key: VPMAXUWrr:  [ 0.00  0.00 ]
+Key: VPMINSBYrm:  [ 0.00  0.00 ]
+Key: VPMINSBYrr:  [ 0.00  0.00 ]
+Key: VPMINSBZ:  [ 0.00  0.00 ]
+Key: VPMINSBZrm:  [ 0.00  0.00 ]
+Key: VPMINSBZrmk:  [ 0.00  0.00 ]
+Key: VPMINSBZrmkz:  [ 0.00  0.00 ]
+Key: VPMINSBZrr:  [ 0.00  0.00 ]
+Key: VPMINSBZrrk:  [ 0.00  0.00 ]
+Key: VPMINSBZrrkz:  [ 0.00  0.00 ]
+Key: VPMINSBrm:  [ 0.00  0.00 ]
+Key: VPMINSBrr:  [ 0.00  0.00 ]
+Key: VPMINSDYrm:  [ 0.00  0.00 ]
+Key: VPMINSDYrr:  [ 0.00  0.00 ]
+Key: VPMINSDZ:  [ 0.00  0.00 ]
+Key: VPMINSDZrm:  [ 0.00  0.00 ]
+Key: VPMINSDZrmb:  [ 0.00  0.00 ]
+Key: VPMINSDZrmbk:  [ 0.00  0.00 ]
+Key: VPMINSDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMINSDZrmk:  [ 0.00  0.00 ]
+Key: VPMINSDZrmkz:  [ 0.00  0.00 ]
+Key: VPMINSDZrr:  [ 0.00  0.00 ]
+Key: VPMINSDZrrk:  [ 0.00  0.00 ]
+Key: VPMINSDZrrkz:  [ 0.00  0.00 ]
+Key: VPMINSDrm:  [ 0.00  0.00 ]
+Key: VPMINSDrr:  [ 0.00  0.00 ]
+Key: VPMINSQZ:  [ 0.00  0.00 ]
+Key: VPMINSQZrm:  [ 0.00  0.00 ]
+Key: VPMINSQZrmb:  [ 0.00  0.00 ]
+Key: VPMINSQZrmbk:  [ 0.00  0.00 ]
+Key: VPMINSQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMINSQZrmk:  [ 0.00  0.00 ]
+Key: VPMINSQZrmkz:  [ 0.00  0.00 ]
+Key: VPMINSQZrr:  [ 0.00  0.00 ]
+Key: VPMINSQZrrk:  [ 0.00  0.00 ]
+Key: VPMINSQZrrkz:  [ 0.00  0.00 ]
+Key: VPMINSWYrm:  [ 0.00  0.00 ]
+Key: VPMINSWYrr:  [ 0.00  0.00 ]
+Key: VPMINSWZ:  [ 0.00  0.00 ]
+Key: VPMINSWZrm:  [ 0.00  0.00 ]
+Key: VPMINSWZrmk:  [ 0.00  0.00 ]
+Key: VPMINSWZrmkz:  [ 0.00  0.00 ]
+Key: VPMINSWZrr:  [ 0.00  0.00 ]
+Key: VPMINSWZrrk:  [ 0.00  0.00 ]
+Key: VPMINSWZrrkz:  [ 0.00  0.00 ]
+Key: VPMINSWrm:  [ 0.00  0.00 ]
+Key: VPMINSWrr:  [ 0.00  0.00 ]
+Key: VPMINUBYrm:  [ 0.00  0.00 ]
+Key: VPMINUBYrr:  [ 0.00  0.00 ]
+Key: VPMINUBZ:  [ 0.00  0.00 ]
+Key: VPMINUBZrm:  [ 0.00  0.00 ]
+Key: VPMINUBZrmk:  [ 0.00  0.00 ]
+Key: VPMINUBZrmkz:  [ 0.00  0.00 ]
+Key: VPMINUBZrr:  [ 0.00  0.00 ]
+Key: VPMINUBZrrk:  [ 0.00  0.00 ]
+Key: VPMINUBZrrkz:  [ 0.00  0.00 ]
+Key: VPMINUBrm:  [ 0.00  0.00 ]
+Key: VPMINUBrr:  [ 0.00  0.00 ]
+Key: VPMINUDYrm:  [ 0.00  0.00 ]
+Key: VPMINUDYrr:  [ 0.00  0.00 ]
+Key: VPMINUDZ:  [ 0.00  0.00 ]
+Key: VPMINUDZrm:  [ 0.00  0.00 ]
+Key: VPMINUDZrmb:  [ 0.00  0.00 ]
+Key: VPMINUDZrmbk:  [ 0.00  0.00 ]
+Key: VPMINUDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMINUDZrmk:  [ 0.00  0.00 ]
+Key: VPMINUDZrmkz:  [ 0.00  0.00 ]
+Key: VPMINUDZrr:  [ 0.00  0.00 ]
+Key: VPMINUDZrrk:  [ 0.00  0.00 ]
+Key: VPMINUDZrrkz:  [ 0.00  0.00 ]
+Key: VPMINUDrm:  [ 0.00  0.00 ]
+Key: VPMINUDrr:  [ 0.00  0.00 ]
+Key: VPMINUQZ:  [ 0.00  0.00 ]
+Key: VPMINUQZrm:  [ 0.00  0.00 ]
+Key: VPMINUQZrmb:  [ 0.00  0.00 ]
+Key: VPMINUQZrmbk:  [ 0.00  0.00 ]
+Key: VPMINUQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMINUQZrmk:  [ 0.00  0.00 ]
+Key: VPMINUQZrmkz:  [ 0.00  0.00 ]
+Key: VPMINUQZrr:  [ 0.00  0.00 ]
+Key: VPMINUQZrrk:  [ 0.00  0.00 ]
+Key: VPMINUQZrrkz:  [ 0.00  0.00 ]
+Key: VPMINUWYrm:  [ 0.00  0.00 ]
+Key: VPMINUWYrr:  [ 0.00  0.00 ]
+Key: VPMINUWZ:  [ 0.00  0.00 ]
+Key: VPMINUWZrm:  [ 0.00  0.00 ]
+Key: VPMINUWZrmk:  [ 0.00  0.00 ]
+Key: VPMINUWZrmkz:  [ 0.00  0.00 ]
+Key: VPMINUWZrr:  [ 0.00  0.00 ]
+Key: VPMINUWZrrk:  [ 0.00  0.00 ]
+Key: VPMINUWZrrkz:  [ 0.00  0.00 ]
+Key: VPMINUWrm:  [ 0.00  0.00 ]
+Key: VPMINUWrr:  [ 0.00  0.00 ]
+Key: VPMOVB:  [ 0.00  0.00 ]
+Key: VPMOVD:  [ 0.00  0.00 ]
+Key: VPMOVDBZ:  [ 0.00  0.00 ]
+Key: VPMOVDBZmr:  [ 0.00  0.00 ]
+Key: VPMOVDBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVDBZrr:  [ 0.00  0.00 ]
+Key: VPMOVDBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVDBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVDWZ:  [ 0.00  0.00 ]
+Key: VPMOVDWZmr:  [ 0.00  0.00 ]
+Key: VPMOVDWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVDWZrr:  [ 0.00  0.00 ]
+Key: VPMOVDWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVDWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVM:  [ 0.00  0.00 ]
+Key: VPMOVMSKBYrr:  [ 0.00  0.00 ]
+Key: VPMOVMSKBrr:  [ 0.00  0.00 ]
+Key: VPMOVQ:  [ 0.00  0.00 ]
+Key: VPMOVQBZ:  [ 0.00  0.00 ]
+Key: VPMOVQBZmr:  [ 0.00  0.00 ]
+Key: VPMOVQBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVQBZrr:  [ 0.00  0.00 ]
+Key: VPMOVQBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVQBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVQDZ:  [ 0.00  0.00 ]
+Key: VPMOVQDZmr:  [ 0.00  0.00 ]
+Key: VPMOVQDZmrk:  [ 0.00  0.00 ]
+Key: VPMOVQDZrr:  [ 0.00  0.00 ]
+Key: VPMOVQDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVQDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVQWZ:  [ 0.00  0.00 ]
+Key: VPMOVQWZmr:  [ 0.00  0.00 ]
+Key: VPMOVQWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVQWZrr:  [ 0.00  0.00 ]
+Key: VPMOVQWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVQWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSDBZ:  [ 0.00  0.00 ]
+Key: VPMOVSDBZmr:  [ 0.00  0.00 ]
+Key: VPMOVSDBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSDBZrr:  [ 0.00  0.00 ]
+Key: VPMOVSDBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSDBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSDWZ:  [ 0.00  0.00 ]
+Key: VPMOVSDWZmr:  [ 0.00  0.00 ]
+Key: VPMOVSDWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSDWZrr:  [ 0.00  0.00 ]
+Key: VPMOVSDWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSDWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSQBZ:  [ 0.00  0.00 ]
+Key: VPMOVSQBZmr:  [ 0.00  0.00 ]
+Key: VPMOVSQBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSQBZrr:  [ 0.00  0.00 ]
+Key: VPMOVSQBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSQBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSQDZ:  [ 0.00  0.00 ]
+Key: VPMOVSQDZmr:  [ 0.00  0.00 ]
+Key: VPMOVSQDZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSQDZrr:  [ 0.00  0.00 ]
+Key: VPMOVSQDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSQDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSQWZ:  [ 0.00  0.00 ]
+Key: VPMOVSQWZmr:  [ 0.00  0.00 ]
+Key: VPMOVSQWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSQWZrr:  [ 0.00  0.00 ]
+Key: VPMOVSQWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSQWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSWBZ:  [ 0.00  0.00 ]
+Key: VPMOVSWBZmr:  [ 0.00  0.00 ]
+Key: VPMOVSWBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVSWBZrr:  [ 0.00  0.00 ]
+Key: VPMOVSWBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSWBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBDYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBDYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZ:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXBDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBDrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBDrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBQYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBQYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZ:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXBQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBQrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBQrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBWYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBWYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZ:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXBWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXBWrm:  [ 0.00  0.00 ]
+Key: VPMOVSXBWrr:  [ 0.00  0.00 ]
+Key: VPMOVSXDQYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXDQYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZ:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXDQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXDQrm:  [ 0.00  0.00 ]
+Key: VPMOVSXDQrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWDYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWDYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZ:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXWDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXWDrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWDrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWQYrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWQYrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZ:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrr:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVSXWQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVSXWQrm:  [ 0.00  0.00 ]
+Key: VPMOVSXWQrr:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZ:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSDBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZ:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSDWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZ:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZ:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZ:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSQWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZ:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZmr:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZrr:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVUSWBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVW:  [ 0.00  0.00 ]
+Key: VPMOVWBZ:  [ 0.00  0.00 ]
+Key: VPMOVWBZmr:  [ 0.00  0.00 ]
+Key: VPMOVWBZmrk:  [ 0.00  0.00 ]
+Key: VPMOVWBZrr:  [ 0.00  0.00 ]
+Key: VPMOVWBZrrk:  [ 0.00  0.00 ]
+Key: VPMOVWBZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBDYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBDYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZ:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXBDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBDrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBDrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBQYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBQYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZ:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXBQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBQrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBQrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBWYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBWYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZ:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXBWZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXBWrm:  [ 0.00  0.00 ]
+Key: VPMOVZXBWrr:  [ 0.00  0.00 ]
+Key: VPMOVZXDQYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXDQYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZ:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXDQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXDQrm:  [ 0.00  0.00 ]
+Key: VPMOVZXDQrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWDYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWDYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZ:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXWDZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXWDrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWDrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWQYrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWQYrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZ:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrmk:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrmkz:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrr:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrrk:  [ 0.00  0.00 ]
+Key: VPMOVZXWQZrrkz:  [ 0.00  0.00 ]
+Key: VPMOVZXWQrm:  [ 0.00  0.00 ]
+Key: VPMOVZXWQrr:  [ 0.00  0.00 ]
+Key: VPMULDQYrm:  [ 0.00  0.00 ]
+Key: VPMULDQYrr:  [ 0.00  0.00 ]
+Key: VPMULDQZ:  [ 0.00  0.00 ]
+Key: VPMULDQZrm:  [ 0.00  0.00 ]
+Key: VPMULDQZrmb:  [ 0.00  0.00 ]
+Key: VPMULDQZrmbk:  [ 0.00  0.00 ]
+Key: VPMULDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULDQZrmk:  [ 0.00  0.00 ]
+Key: VPMULDQZrmkz:  [ 0.00  0.00 ]
+Key: VPMULDQZrr:  [ 0.00  0.00 ]
+Key: VPMULDQZrrk:  [ 0.00  0.00 ]
+Key: VPMULDQZrrkz:  [ 0.00  0.00 ]
+Key: VPMULDQrm:  [ 0.00  0.00 ]
+Key: VPMULDQrr:  [ 0.00  0.00 ]
+Key: VPMULHRSWYrm:  [ 0.00  0.00 ]
+Key: VPMULHRSWYrr:  [ 0.00  0.00 ]
+Key: VPMULHRSWZ:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrm:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrmk:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrmkz:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrr:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrrk:  [ 0.00  0.00 ]
+Key: VPMULHRSWZrrkz:  [ 0.00  0.00 ]
+Key: VPMULHRSWrm:  [ 0.00  0.00 ]
+Key: VPMULHRSWrr:  [ 0.00  0.00 ]
+Key: VPMULHUWYrm:  [ 0.00  0.00 ]
+Key: VPMULHUWYrr:  [ 0.00  0.00 ]
+Key: VPMULHUWZ:  [ 0.00  0.00 ]
+Key: VPMULHUWZrm:  [ 0.00  0.00 ]
+Key: VPMULHUWZrmk:  [ 0.00  0.00 ]
+Key: VPMULHUWZrmkz:  [ 0.00  0.00 ]
+Key: VPMULHUWZrr:  [ 0.00  0.00 ]
+Key: VPMULHUWZrrk:  [ 0.00  0.00 ]
+Key: VPMULHUWZrrkz:  [ 0.00  0.00 ]
+Key: VPMULHUWrm:  [ 0.00  0.00 ]
+Key: VPMULHUWrr:  [ 0.00  0.00 ]
+Key: VPMULHWYrm:  [ 0.00  0.00 ]
+Key: VPMULHWYrr:  [ 0.00  0.00 ]
+Key: VPMULHWZ:  [ 0.00  0.00 ]
+Key: VPMULHWZrm:  [ 0.00  0.00 ]
+Key: VPMULHWZrmk:  [ 0.00  0.00 ]
+Key: VPMULHWZrmkz:  [ 0.00  0.00 ]
+Key: VPMULHWZrr:  [ 0.00  0.00 ]
+Key: VPMULHWZrrk:  [ 0.00  0.00 ]
+Key: VPMULHWZrrkz:  [ 0.00  0.00 ]
+Key: VPMULHWrm:  [ 0.00  0.00 ]
+Key: VPMULHWrr:  [ 0.00  0.00 ]
+Key: VPMULLDYrm:  [ 0.00  0.00 ]
+Key: VPMULLDYrr:  [ 0.00  0.00 ]
+Key: VPMULLDZ:  [ 0.00  0.00 ]
+Key: VPMULLDZrm:  [ 0.00  0.00 ]
+Key: VPMULLDZrmb:  [ 0.00  0.00 ]
+Key: VPMULLDZrmbk:  [ 0.00  0.00 ]
+Key: VPMULLDZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULLDZrmk:  [ 0.00  0.00 ]
+Key: VPMULLDZrmkz:  [ 0.00  0.00 ]
+Key: VPMULLDZrr:  [ 0.00  0.00 ]
+Key: VPMULLDZrrk:  [ 0.00  0.00 ]
+Key: VPMULLDZrrkz:  [ 0.00  0.00 ]
+Key: VPMULLDrm:  [ 0.00  0.00 ]
+Key: VPMULLDrr:  [ 0.00  0.00 ]
+Key: VPMULLQZ:  [ 0.00  0.00 ]
+Key: VPMULLQZrm:  [ 0.00  0.00 ]
+Key: VPMULLQZrmb:  [ 0.00  0.00 ]
+Key: VPMULLQZrmbk:  [ 0.00  0.00 ]
+Key: VPMULLQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULLQZrmk:  [ 0.00  0.00 ]
+Key: VPMULLQZrmkz:  [ 0.00  0.00 ]
+Key: VPMULLQZrr:  [ 0.00  0.00 ]
+Key: VPMULLQZrrk:  [ 0.00  0.00 ]
+Key: VPMULLQZrrkz:  [ 0.00  0.00 ]
+Key: VPMULLWYrm:  [ 0.00  0.00 ]
+Key: VPMULLWYrr:  [ 0.00  0.00 ]
+Key: VPMULLWZ:  [ 0.00  0.00 ]
+Key: VPMULLWZrm:  [ 0.00  0.00 ]
+Key: VPMULLWZrmk:  [ 0.00  0.00 ]
+Key: VPMULLWZrmkz:  [ 0.00  0.00 ]
+Key: VPMULLWZrr:  [ 0.00  0.00 ]
+Key: VPMULLWZrrk:  [ 0.00  0.00 ]
+Key: VPMULLWZrrkz:  [ 0.00  0.00 ]
+Key: VPMULLWrm:  [ 0.00  0.00 ]
+Key: VPMULLWrr:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZ:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrm:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmb:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmbk:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmk:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrmkz:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrr:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrrk:  [ 0.00  0.00 ]
+Key: VPMULTISHIFTQBZrrkz:  [ 0.00  0.00 ]
+Key: VPMULUDQYrm:  [ 0.00  0.00 ]
+Key: VPMULUDQYrr:  [ 0.00  0.00 ]
+Key: VPMULUDQZ:  [ 0.00  0.00 ]
+Key: VPMULUDQZrm:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmb:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmbk:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmk:  [ 0.00  0.00 ]
+Key: VPMULUDQZrmkz:  [ 0.00  0.00 ]
+Key: VPMULUDQZrr:  [ 0.00  0.00 ]
+Key: VPMULUDQZrrk:  [ 0.00  0.00 ]
+Key: VPMULUDQZrrkz:  [ 0.00  0.00 ]
+Key: VPMULUDQrm:  [ 0.00  0.00 ]
+Key: VPMULUDQrr:  [ 0.00  0.00 ]
+Key: VPOPCNTBZ:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrm:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrmk:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrmkz:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrr:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrrk:  [ 0.00  0.00 ]
+Key: VPOPCNTBZrrkz:  [ 0.00  0.00 ]
+Key: VPOPCNTDZ:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrm:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmb:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmbk:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmbkz:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmk:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrmkz:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrr:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrrk:  [ 0.00  0.00 ]
+Key: VPOPCNTDZrrkz:  [ 0.00  0.00 ]
+Key: VPOPCNTQZ:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrm:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmb:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmbk:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmbkz:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmk:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrmkz:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrr:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrrk:  [ 0.00  0.00 ]
+Key: VPOPCNTQZrrkz:  [ 0.00  0.00 ]
+Key: VPOPCNTWZ:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrm:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrmk:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrmkz:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrr:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrrk:  [ 0.00  0.00 ]
+Key: VPOPCNTWZrrkz:  [ 0.00  0.00 ]
+Key: VPORDZ:  [ 0.00  0.00 ]
+Key: VPORDZrm:  [ 0.00  0.00 ]
+Key: VPORDZrmb:  [ 0.00  0.00 ]
+Key: VPORDZrmbk:  [ 0.00  0.00 ]
+Key: VPORDZrmbkz:  [ 0.00  0.00 ]
+Key: VPORDZrmk:  [ 0.00  0.00 ]
+Key: VPORDZrmkz:  [ 0.00  0.00 ]
+Key: VPORDZrr:  [ 0.00  0.00 ]
+Key: VPORDZrrk:  [ 0.00  0.00 ]
+Key: VPORDZrrkz:  [ 0.00  0.00 ]
+Key: VPORQZ:  [ 0.00  0.00 ]
+Key: VPORQZrm:  [ 0.00  0.00 ]
+Key: VPORQZrmb:  [ 0.00  0.00 ]
+Key: VPORQZrmbk:  [ 0.00  0.00 ]
+Key: VPORQZrmbkz:  [ 0.00  0.00 ]
+Key: VPORQZrmk:  [ 0.00  0.00 ]
+Key: VPORQZrmkz:  [ 0.00  0.00 ]
+Key: VPORQZrr:  [ 0.00  0.00 ]
+Key: VPORQZrrk:  [ 0.00  0.00 ]
+Key: VPORQZrrkz:  [ 0.00  0.00 ]
+Key: VPORYrm:  [ 0.00  0.00 ]
+Key: VPORYrr:  [ 0.00  0.00 ]
+Key: VPORrm:  [ 0.00  0.00 ]
+Key: VPORrr:  [ 0.00  0.00 ]
+Key: VPPERMrmr:  [ 0.00  0.00 ]
+Key: VPPERMrrm:  [ 0.00  0.00 ]
+Key: VPPERMrrr:  [ 0.00  0.00 ]
+Key: VPPERMrrr_REV:  [ 0.00  0.00 ]
+Key: VPROLDZ:  [ 0.00  0.00 ]
+Key: VPROLDZmbi:  [ 0.00  0.00 ]
+Key: VPROLDZmbik:  [ 0.00  0.00 ]
+Key: VPROLDZmbikz:  [ 0.00  0.00 ]
+Key: VPROLDZmi:  [ 0.00  0.00 ]
+Key: VPROLDZmik:  [ 0.00  0.00 ]
+Key: VPROLDZmikz:  [ 0.00  0.00 ]
+Key: VPROLDZri:  [ 0.00  0.00 ]
+Key: VPROLDZrik:  [ 0.00  0.00 ]
+Key: VPROLDZrikz:  [ 0.00  0.00 ]
+Key: VPROLQZ:  [ 0.00  0.00 ]
+Key: VPROLQZmbi:  [ 0.00  0.00 ]
+Key: VPROLQZmbik:  [ 0.00  0.00 ]
+Key: VPROLQZmbikz:  [ 0.00  0.00 ]
+Key: VPROLQZmi:  [ 0.00  0.00 ]
+Key: VPROLQZmik:  [ 0.00  0.00 ]
+Key: VPROLQZmikz:  [ 0.00  0.00 ]
+Key: VPROLQZri:  [ 0.00  0.00 ]
+Key: VPROLQZrik:  [ 0.00  0.00 ]
+Key: VPROLQZrikz:  [ 0.00  0.00 ]
+Key: VPROLVDZ:  [ 0.00  0.00 ]
+Key: VPROLVDZrm:  [ 0.00  0.00 ]
+Key: VPROLVDZrmb:  [ 0.00  0.00 ]
+Key: VPROLVDZrmbk:  [ 0.00  0.00 ]
+Key: VPROLVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPROLVDZrmk:  [ 0.00  0.00 ]
+Key: VPROLVDZrmkz:  [ 0.00  0.00 ]
+Key: VPROLVDZrr:  [ 0.00  0.00 ]
+Key: VPROLVDZrrk:  [ 0.00  0.00 ]
+Key: VPROLVDZrrkz:  [ 0.00  0.00 ]
+Key: VPROLVQZ:  [ 0.00  0.00 ]
+Key: VPROLVQZrm:  [ 0.00  0.00 ]
+Key: VPROLVQZrmb:  [ 0.00  0.00 ]
+Key: VPROLVQZrmbk:  [ 0.00  0.00 ]
+Key: VPROLVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPROLVQZrmk:  [ 0.00  0.00 ]
+Key: VPROLVQZrmkz:  [ 0.00  0.00 ]
+Key: VPROLVQZrr:  [ 0.00  0.00 ]
+Key: VPROLVQZrrk:  [ 0.00  0.00 ]
+Key: VPROLVQZrrkz:  [ 0.00  0.00 ]
+Key: VPRORDZ:  [ 0.00  0.00 ]
+Key: VPRORDZmbi:  [ 0.00  0.00 ]
+Key: VPRORDZmbik:  [ 0.00  0.00 ]
+Key: VPRORDZmbikz:  [ 0.00  0.00 ]
+Key: VPRORDZmi:  [ 0.00  0.00 ]
+Key: VPRORDZmik:  [ 0.00  0.00 ]
+Key: VPRORDZmikz:  [ 0.00  0.00 ]
+Key: VPRORDZri:  [ 0.00  0.00 ]
+Key: VPRORDZrik:  [ 0.00  0.00 ]
+Key: VPRORDZrikz:  [ 0.00  0.00 ]
+Key: VPRORQZ:  [ 0.00  0.00 ]
+Key: VPRORQZmbi:  [ 0.00  0.00 ]
+Key: VPRORQZmbik:  [ 0.00  0.00 ]
+Key: VPRORQZmbikz:  [ 0.00  0.00 ]
+Key: VPRORQZmi:  [ 0.00  0.00 ]
+Key: VPRORQZmik:  [ 0.00  0.00 ]
+Key: VPRORQZmikz:  [ 0.00  0.00 ]
+Key: VPRORQZri:  [ 0.00  0.00 ]
+Key: VPRORQZrik:  [ 0.00  0.00 ]
+Key: VPRORQZrikz:  [ 0.00  0.00 ]
+Key: VPRORVDZ:  [ 0.00  0.00 ]
+Key: VPRORVDZrm:  [ 0.00  0.00 ]
+Key: VPRORVDZrmb:  [ 0.00  0.00 ]
+Key: VPRORVDZrmbk:  [ 0.00  0.00 ]
+Key: VPRORVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPRORVDZrmk:  [ 0.00  0.00 ]
+Key: VPRORVDZrmkz:  [ 0.00  0.00 ]
+Key: VPRORVDZrr:  [ 0.00  0.00 ]
+Key: VPRORVDZrrk:  [ 0.00  0.00 ]
+Key: VPRORVDZrrkz:  [ 0.00  0.00 ]
+Key: VPRORVQZ:  [ 0.00  0.00 ]
+Key: VPRORVQZrm:  [ 0.00  0.00 ]
+Key: VPRORVQZrmb:  [ 0.00  0.00 ]
+Key: VPRORVQZrmbk:  [ 0.00  0.00 ]
+Key: VPRORVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPRORVQZrmk:  [ 0.00  0.00 ]
+Key: VPRORVQZrmkz:  [ 0.00  0.00 ]
+Key: VPRORVQZrr:  [ 0.00  0.00 ]
+Key: VPRORVQZrrk:  [ 0.00  0.00 ]
+Key: VPRORVQZrrkz:  [ 0.00  0.00 ]
+Key: VPROTBmi:  [ 0.00  0.00 ]
+Key: VPROTBmr:  [ 0.00  0.00 ]
+Key: VPROTBri:  [ 0.00  0.00 ]
+Key: VPROTBrm:  [ 0.00  0.00 ]
+Key: VPROTBrr:  [ 0.00  0.00 ]
+Key: VPROTBrr_REV:  [ 0.00  0.00 ]
+Key: VPROTDmi:  [ 0.00  0.00 ]
+Key: VPROTDmr:  [ 0.00  0.00 ]
+Key: VPROTDri:  [ 0.00  0.00 ]
+Key: VPROTDrm:  [ 0.00  0.00 ]
+Key: VPROTDrr:  [ 0.00  0.00 ]
+Key: VPROTDrr_REV:  [ 0.00  0.00 ]
+Key: VPROTQmi:  [ 0.00  0.00 ]
+Key: VPROTQmr:  [ 0.00  0.00 ]
+Key: VPROTQri:  [ 0.00  0.00 ]
+Key: VPROTQrm:  [ 0.00  0.00 ]
+Key: VPROTQrr:  [ 0.00  0.00 ]
+Key: VPROTQrr_REV:  [ 0.00  0.00 ]
+Key: VPROTWmi:  [ 0.00  0.00 ]
+Key: VPROTWmr:  [ 0.00  0.00 ]
+Key: VPROTWri:  [ 0.00  0.00 ]
+Key: VPROTWrm:  [ 0.00  0.00 ]
+Key: VPROTWrr:  [ 0.00  0.00 ]
+Key: VPROTWrr_REV:  [ 0.00  0.00 ]
+Key: VPSADBWYrm:  [ 0.00  0.00 ]
+Key: VPSADBWYrr:  [ 0.00  0.00 ]
+Key: VPSADBWZ:  [ 0.00  0.00 ]
+Key: VPSADBWZrm:  [ 0.00  0.00 ]
+Key: VPSADBWZrr:  [ 0.00  0.00 ]
+Key: VPSADBWrm:  [ 0.00  0.00 ]
+Key: VPSADBWrr:  [ 0.00  0.00 ]
+Key: VPSCATTERDDZ:  [ 0.00  0.00 ]
+Key: VPSCATTERDDZmr:  [ 0.00  0.00 ]
+Key: VPSCATTERDQZ:  [ 0.00  0.00 ]
+Key: VPSCATTERDQZmr:  [ 0.00  0.00 ]
+Key: VPSCATTERQDZ:  [ 0.00  0.00 ]
+Key: VPSCATTERQDZmr:  [ 0.00  0.00 ]
+Key: VPSCATTERQQZ:  [ 0.00  0.00 ]
+Key: VPSCATTERQQZmr:  [ 0.00  0.00 ]
+Key: VPSHABmr:  [ 0.00  0.00 ]
+Key: VPSHABrm:  [ 0.00  0.00 ]
+Key: VPSHABrr:  [ 0.00  0.00 ]
+Key: VPSHABrr_REV:  [ 0.00  0.00 ]
+Key: VPSHADmr:  [ 0.00  0.00 ]
+Key: VPSHADrm:  [ 0.00  0.00 ]
+Key: VPSHADrr:  [ 0.00  0.00 ]
+Key: VPSHADrr_REV:  [ 0.00  0.00 ]
+Key: VPSHAQmr:  [ 0.00  0.00 ]
+Key: VPSHAQrm:  [ 0.00  0.00 ]
+Key: VPSHAQrr:  [ 0.00  0.00 ]
+Key: VPSHAQrr_REV:  [ 0.00  0.00 ]
+Key: VPSHAWmr:  [ 0.00  0.00 ]
+Key: VPSHAWrm:  [ 0.00  0.00 ]
+Key: VPSHAWrr:  [ 0.00  0.00 ]
+Key: VPSHAWrr_REV:  [ 0.00  0.00 ]
+Key: VPSHLBmr:  [ 0.00  0.00 ]
+Key: VPSHLBrm:  [ 0.00  0.00 ]
+Key: VPSHLBrr:  [ 0.00  0.00 ]
+Key: VPSHLBrr_REV:  [ 0.00  0.00 ]
+Key: VPSHLDDZ:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmbi:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmbik:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmbikz:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmi:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmik:  [ 0.00  0.00 ]
+Key: VPSHLDDZrmikz:  [ 0.00  0.00 ]
+Key: VPSHLDDZrri:  [ 0.00  0.00 ]
+Key: VPSHLDDZrrik:  [ 0.00  0.00 ]
+Key: VPSHLDDZrrikz:  [ 0.00  0.00 ]
+Key: VPSHLDQZ:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmbi:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmbik:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmbikz:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmi:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmik:  [ 0.00  0.00 ]
+Key: VPSHLDQZrmikz:  [ 0.00  0.00 ]
+Key: VPSHLDQZrri:  [ 0.00  0.00 ]
+Key: VPSHLDQZrrik:  [ 0.00  0.00 ]
+Key: VPSHLDQZrrikz:  [ 0.00  0.00 ]
+Key: VPSHLDVDZ:  [ 0.00  0.00 ]
+Key: VPSHLDVDZm:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmb:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmbk:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmbkz:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmk:  [ 0.00  0.00 ]
+Key: VPSHLDVDZmkz:  [ 0.00  0.00 ]
+Key: VPSHLDVDZr:  [ 0.00  0.00 ]
+Key: VPSHLDVDZrk:  [ 0.00  0.00 ]
+Key: VPSHLDVDZrkz:  [ 0.00  0.00 ]
+Key: VPSHLDVQZ:  [ 0.00  0.00 ]
+Key: VPSHLDVQZm:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmb:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmbk:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmbkz:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmk:  [ 0.00  0.00 ]
+Key: VPSHLDVQZmkz:  [ 0.00  0.00 ]
+Key: VPSHLDVQZr:  [ 0.00  0.00 ]
+Key: VPSHLDVQZrk:  [ 0.00  0.00 ]
+Key: VPSHLDVQZrkz:  [ 0.00  0.00 ]
+Key: VPSHLDVWZ:  [ 0.00  0.00 ]
+Key: VPSHLDVWZm:  [ 0.00  0.00 ]
+Key: VPSHLDVWZmk:  [ 0.00  0.00 ]
+Key: VPSHLDVWZmkz:  [ 0.00  0.00 ]
+Key: VPSHLDVWZr:  [ 0.00  0.00 ]
+Key: VPSHLDVWZrk:  [ 0.00  0.00 ]
+Key: VPSHLDVWZrkz:  [ 0.00  0.00 ]
+Key: VPSHLDWZ:  [ 0.00  0.00 ]
+Key: VPSHLDWZrmi:  [ 0.00  0.00 ]
+Key: VPSHLDWZrmik:  [ 0.00  0.00 ]
+Key: VPSHLDWZrmikz:  [ 0.00  0.00 ]
+Key: VPSHLDWZrri:  [ 0.00  0.00 ]
+Key: VPSHLDWZrrik:  [ 0.00  0.00 ]
+Key: VPSHLDWZrrikz:  [ 0.00  0.00 ]
+Key: VPSHLDmr:  [ 0.00  0.00 ]
+Key: VPSHLDrm:  [ 0.00  0.00 ]
+Key: VPSHLDrr:  [ 0.00  0.00 ]
+Key: VPSHLDrr_REV:  [ 0.00  0.00 ]
+Key: VPSHLQmr:  [ 0.00  0.00 ]
+Key: VPSHLQrm:  [ 0.00  0.00 ]
+Key: VPSHLQrr:  [ 0.00  0.00 ]
+Key: VPSHLQrr_REV:  [ 0.00  0.00 ]
+Key: VPSHLWmr:  [ 0.00  0.00 ]
+Key: VPSHLWrm:  [ 0.00  0.00 ]
+Key: VPSHLWrr:  [ 0.00  0.00 ]
+Key: VPSHLWrr_REV:  [ 0.00  0.00 ]
+Key: VPSHRDDZ:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmbi:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmbik:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmbikz:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmi:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmik:  [ 0.00  0.00 ]
+Key: VPSHRDDZrmikz:  [ 0.00  0.00 ]
+Key: VPSHRDDZrri:  [ 0.00  0.00 ]
+Key: VPSHRDDZrrik:  [ 0.00  0.00 ]
+Key: VPSHRDDZrrikz:  [ 0.00  0.00 ]
+Key: VPSHRDQZ:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmbi:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmbik:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmbikz:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmi:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmik:  [ 0.00  0.00 ]
+Key: VPSHRDQZrmikz:  [ 0.00  0.00 ]
+Key: VPSHRDQZrri:  [ 0.00  0.00 ]
+Key: VPSHRDQZrrik:  [ 0.00  0.00 ]
+Key: VPSHRDQZrrikz:  [ 0.00  0.00 ]
+Key: VPSHRDVDZ:  [ 0.00  0.00 ]
+Key: VPSHRDVDZm:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmb:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmbk:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmbkz:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmk:  [ 0.00  0.00 ]
+Key: VPSHRDVDZmkz:  [ 0.00  0.00 ]
+Key: VPSHRDVDZr:  [ 0.00  0.00 ]
+Key: VPSHRDVDZrk:  [ 0.00  0.00 ]
+Key: VPSHRDVDZrkz:  [ 0.00  0.00 ]
+Key: VPSHRDVQZ:  [ 0.00  0.00 ]
+Key: VPSHRDVQZm:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmb:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmbk:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmbkz:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmk:  [ 0.00  0.00 ]
+Key: VPSHRDVQZmkz:  [ 0.00  0.00 ]
+Key: VPSHRDVQZr:  [ 0.00  0.00 ]
+Key: VPSHRDVQZrk:  [ 0.00  0.00 ]
+Key: VPSHRDVQZrkz:  [ 0.00  0.00 ]
+Key: VPSHRDVWZ:  [ 0.00  0.00 ]
+Key: VPSHRDVWZm:  [ 0.00  0.00 ]
+Key: VPSHRDVWZmk:  [ 0.00  0.00 ]
+Key: VPSHRDVWZmkz:  [ 0.00  0.00 ]
+Key: VPSHRDVWZr:  [ 0.00  0.00 ]
+Key: VPSHRDVWZrk:  [ 0.00  0.00 ]
+Key: VPSHRDVWZrkz:  [ 0.00  0.00 ]
+Key: VPSHRDWZ:  [ 0.00  0.00 ]
+Key: VPSHRDWZrmi:  [ 0.00  0.00 ]
+Key: VPSHRDWZrmik:  [ 0.00  0.00 ]
+Key: VPSHRDWZrmikz:  [ 0.00  0.00 ]
+Key: VPSHRDWZrri:  [ 0.00  0.00 ]
+Key: VPSHRDWZrrik:  [ 0.00  0.00 ]
+Key: VPSHRDWZrrikz:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZ:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZrm:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZrmk:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZrr:  [ 0.00  0.00 ]
+Key: VPSHUFBITQMBZrrk:  [ 0.00  0.00 ]
+Key: VPSHUFBYrm:  [ 0.00  0.00 ]
+Key: VPSHUFBYrr:  [ 0.00  0.00 ]
+Key: VPSHUFBZ:  [ 0.00  0.00 ]
+Key: VPSHUFBZrm:  [ 0.00  0.00 ]
+Key: VPSHUFBZrmk:  [ 0.00  0.00 ]
+Key: VPSHUFBZrmkz:  [ 0.00  0.00 ]
+Key: VPSHUFBZrr:  [ 0.00  0.00 ]
+Key: VPSHUFBZrrk:  [ 0.00  0.00 ]
+Key: VPSHUFBZrrkz:  [ 0.00  0.00 ]
+Key: VPSHUFBrm:  [ 0.00  0.00 ]
+Key: VPSHUFBrr:  [ 0.00  0.00 ]
+Key: VPSHUFDYmi:  [ 0.00  0.00 ]
+Key: VPSHUFDYri:  [ 0.00  0.00 ]
+Key: VPSHUFDZ:  [ 0.00  0.00 ]
+Key: VPSHUFDZmbi:  [ 0.00  0.00 ]
+Key: VPSHUFDZmbik:  [ 0.00  0.00 ]
+Key: VPSHUFDZmbikz:  [ 0.00  0.00 ]
+Key: VPSHUFDZmi:  [ 0.00  0.00 ]
+Key: VPSHUFDZmik:  [ 0.00  0.00 ]
+Key: VPSHUFDZmikz:  [ 0.00  0.00 ]
+Key: VPSHUFDZri:  [ 0.00  0.00 ]
+Key: VPSHUFDZrik:  [ 0.00  0.00 ]
+Key: VPSHUFDZrikz:  [ 0.00  0.00 ]
+Key: VPSHUFDmi:  [ 0.00  0.00 ]
+Key: VPSHUFDri:  [ 0.00  0.00 ]
+Key: VPSHUFHWYmi:  [ 0.00  0.00 ]
+Key: VPSHUFHWYri:  [ 0.00  0.00 ]
+Key: VPSHUFHWZ:  [ 0.00  0.00 ]
+Key: VPSHUFHWZmi:  [ 0.00  0.00 ]
+Key: VPSHUFHWZmik:  [ 0.00  0.00 ]
+Key: VPSHUFHWZmikz:  [ 0.00  0.00 ]
+Key: VPSHUFHWZri:  [ 0.00  0.00 ]
+Key: VPSHUFHWZrik:  [ 0.00  0.00 ]
+Key: VPSHUFHWZrikz:  [ 0.00  0.00 ]
+Key: VPSHUFHWmi:  [ 0.00  0.00 ]
+Key: VPSHUFHWri:  [ 0.00  0.00 ]
+Key: VPSHUFLWYmi:  [ 0.00  0.00 ]
+Key: VPSHUFLWYri:  [ 0.00  0.00 ]
+Key: VPSHUFLWZ:  [ 0.00  0.00 ]
+Key: VPSHUFLWZmi:  [ 0.00  0.00 ]
+Key: VPSHUFLWZmik:  [ 0.00  0.00 ]
+Key: VPSHUFLWZmikz:  [ 0.00  0.00 ]
+Key: VPSHUFLWZri:  [ 0.00  0.00 ]
+Key: VPSHUFLWZrik:  [ 0.00  0.00 ]
+Key: VPSHUFLWZrikz:  [ 0.00  0.00 ]
+Key: VPSHUFLWmi:  [ 0.00  0.00 ]
+Key: VPSHUFLWri:  [ 0.00  0.00 ]
+Key: VPSIGNBYrm:  [ 0.00  0.00 ]
+Key: VPSIGNBYrr:  [ 0.00  0.00 ]
+Key: VPSIGNBrm:  [ 0.00  0.00 ]
+Key: VPSIGNBrr:  [ 0.00  0.00 ]
+Key: VPSIGNDYrm:  [ 0.00  0.00 ]
+Key: VPSIGNDYrr:  [ 0.00  0.00 ]
+Key: VPSIGNDrm:  [ 0.00  0.00 ]
+Key: VPSIGNDrr:  [ 0.00  0.00 ]
+Key: VPSIGNWYrm:  [ 0.00  0.00 ]
+Key: VPSIGNWYrr:  [ 0.00  0.00 ]
+Key: VPSIGNWrm:  [ 0.00  0.00 ]
+Key: VPSIGNWrr:  [ 0.00  0.00 ]
+Key: VPSLLDQYri:  [ 0.00  0.00 ]
+Key: VPSLLDQZ:  [ 0.00  0.00 ]
+Key: VPSLLDQZmi:  [ 0.00  0.00 ]
+Key: VPSLLDQZri:  [ 0.00  0.00 ]
+Key: VPSLLDQri:  [ 0.00  0.00 ]
+Key: VPSLLDYri:  [ 0.00  0.00 ]
+Key: VPSLLDYrm:  [ 0.00  0.00 ]
+Key: VPSLLDYrr:  [ 0.00  0.00 ]
+Key: VPSLLDZ:  [ 0.00  0.00 ]
+Key: VPSLLDZmbi:  [ 0.00  0.00 ]
+Key: VPSLLDZmbik:  [ 0.00  0.00 ]
+Key: VPSLLDZmbikz:  [ 0.00  0.00 ]
+Key: VPSLLDZmi:  [ 0.00  0.00 ]
+Key: VPSLLDZmik:  [ 0.00  0.00 ]
+Key: VPSLLDZmikz:  [ 0.00  0.00 ]
+Key: VPSLLDZri:  [ 0.00  0.00 ]
+Key: VPSLLDZrik:  [ 0.00  0.00 ]
+Key: VPSLLDZrikz:  [ 0.00  0.00 ]
+Key: VPSLLDZrm:  [ 0.00  0.00 ]
+Key: VPSLLDZrmk:  [ 0.00  0.00 ]
+Key: VPSLLDZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLDZrr:  [ 0.00  0.00 ]
+Key: VPSLLDZrrk:  [ 0.00  0.00 ]
+Key: VPSLLDZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLDri:  [ 0.00  0.00 ]
+Key: VPSLLDrm:  [ 0.00  0.00 ]
+Key: VPSLLDrr:  [ 0.00  0.00 ]
+Key: VPSLLQYri:  [ 0.00  0.00 ]
+Key: VPSLLQYrm:  [ 0.00  0.00 ]
+Key: VPSLLQYrr:  [ 0.00  0.00 ]
+Key: VPSLLQZ:  [ 0.00  0.00 ]
+Key: VPSLLQZmbi:  [ 0.00  0.00 ]
+Key: VPSLLQZmbik:  [ 0.00  0.00 ]
+Key: VPSLLQZmbikz:  [ 0.00  0.00 ]
+Key: VPSLLQZmi:  [ 0.00  0.00 ]
+Key: VPSLLQZmik:  [ 0.00  0.00 ]
+Key: VPSLLQZmikz:  [ 0.00  0.00 ]
+Key: VPSLLQZri:  [ 0.00  0.00 ]
+Key: VPSLLQZrik:  [ 0.00  0.00 ]
+Key: VPSLLQZrikz:  [ 0.00  0.00 ]
+Key: VPSLLQZrm:  [ 0.00  0.00 ]
+Key: VPSLLQZrmk:  [ 0.00  0.00 ]
+Key: VPSLLQZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLQZrr:  [ 0.00  0.00 ]
+Key: VPSLLQZrrk:  [ 0.00  0.00 ]
+Key: VPSLLQZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLQri:  [ 0.00  0.00 ]
+Key: VPSLLQrm:  [ 0.00  0.00 ]
+Key: VPSLLQrr:  [ 0.00  0.00 ]
+Key: VPSLLVDYrm:  [ 0.00  0.00 ]
+Key: VPSLLVDYrr:  [ 0.00  0.00 ]
+Key: VPSLLVDZ:  [ 0.00  0.00 ]
+Key: VPSLLVDZrm:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmb:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmbk:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmk:  [ 0.00  0.00 ]
+Key: VPSLLVDZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLVDZrr:  [ 0.00  0.00 ]
+Key: VPSLLVDZrrk:  [ 0.00  0.00 ]
+Key: VPSLLVDZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLVDrm:  [ 0.00  0.00 ]
+Key: VPSLLVDrr:  [ 0.00  0.00 ]
+Key: VPSLLVQYrm:  [ 0.00  0.00 ]
+Key: VPSLLVQYrr:  [ 0.00  0.00 ]
+Key: VPSLLVQZ:  [ 0.00  0.00 ]
+Key: VPSLLVQZrm:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmb:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmbk:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmk:  [ 0.00  0.00 ]
+Key: VPSLLVQZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLVQZrr:  [ 0.00  0.00 ]
+Key: VPSLLVQZrrk:  [ 0.00  0.00 ]
+Key: VPSLLVQZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLVQrm:  [ 0.00  0.00 ]
+Key: VPSLLVQrr:  [ 0.00  0.00 ]
+Key: VPSLLVWZ:  [ 0.00  0.00 ]
+Key: VPSLLVWZrm:  [ 0.00  0.00 ]
+Key: VPSLLVWZrmk:  [ 0.00  0.00 ]
+Key: VPSLLVWZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLVWZrr:  [ 0.00  0.00 ]
+Key: VPSLLVWZrrk:  [ 0.00  0.00 ]
+Key: VPSLLVWZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLWYri:  [ 0.00  0.00 ]
+Key: VPSLLWYrm:  [ 0.00  0.00 ]
+Key: VPSLLWYrr:  [ 0.00  0.00 ]
+Key: VPSLLWZ:  [ 0.00  0.00 ]
+Key: VPSLLWZmi:  [ 0.00  0.00 ]
+Key: VPSLLWZmik:  [ 0.00  0.00 ]
+Key: VPSLLWZmikz:  [ 0.00  0.00 ]
+Key: VPSLLWZri:  [ 0.00  0.00 ]
+Key: VPSLLWZrik:  [ 0.00  0.00 ]
+Key: VPSLLWZrikz:  [ 0.00  0.00 ]
+Key: VPSLLWZrm:  [ 0.00  0.00 ]
+Key: VPSLLWZrmk:  [ 0.00  0.00 ]
+Key: VPSLLWZrmkz:  [ 0.00  0.00 ]
+Key: VPSLLWZrr:  [ 0.00  0.00 ]
+Key: VPSLLWZrrk:  [ 0.00  0.00 ]
+Key: VPSLLWZrrkz:  [ 0.00  0.00 ]
+Key: VPSLLWri:  [ 0.00  0.00 ]
+Key: VPSLLWrm:  [ 0.00  0.00 ]
+Key: VPSLLWrr:  [ 0.00  0.00 ]
+Key: VPSRADYri:  [ 0.00  0.00 ]
+Key: VPSRADYrm:  [ 0.00  0.00 ]
+Key: VPSRADYrr:  [ 0.00  0.00 ]
+Key: VPSRADZ:  [ 0.00  0.00 ]
+Key: VPSRADZmbi:  [ 0.00  0.00 ]
+Key: VPSRADZmbik:  [ 0.00  0.00 ]
+Key: VPSRADZmbikz:  [ 0.00  0.00 ]
+Key: VPSRADZmi:  [ 0.00  0.00 ]
+Key: VPSRADZmik:  [ 0.00  0.00 ]
+Key: VPSRADZmikz:  [ 0.00  0.00 ]
+Key: VPSRADZri:  [ 0.00  0.00 ]
+Key: VPSRADZrik:  [ 0.00  0.00 ]
+Key: VPSRADZrikz:  [ 0.00  0.00 ]
+Key: VPSRADZrm:  [ 0.00  0.00 ]
+Key: VPSRADZrmk:  [ 0.00  0.00 ]
+Key: VPSRADZrmkz:  [ 0.00  0.00 ]
+Key: VPSRADZrr:  [ 0.00  0.00 ]
+Key: VPSRADZrrk:  [ 0.00  0.00 ]
+Key: VPSRADZrrkz:  [ 0.00  0.00 ]
+Key: VPSRADri:  [ 0.00  0.00 ]
+Key: VPSRADrm:  [ 0.00  0.00 ]
+Key: VPSRADrr:  [ 0.00  0.00 ]
+Key: VPSRAQZ:  [ 0.00  0.00 ]
+Key: VPSRAQZmbi:  [ 0.00  0.00 ]
+Key: VPSRAQZmbik:  [ 0.00  0.00 ]
+Key: VPSRAQZmbikz:  [ 0.00  0.00 ]
+Key: VPSRAQZmi:  [ 0.00  0.00 ]
+Key: VPSRAQZmik:  [ 0.00  0.00 ]
+Key: VPSRAQZmikz:  [ 0.00  0.00 ]
+Key: VPSRAQZri:  [ 0.00  0.00 ]
+Key: VPSRAQZrik:  [ 0.00  0.00 ]
+Key: VPSRAQZrikz:  [ 0.00  0.00 ]
+Key: VPSRAQZrm:  [ 0.00  0.00 ]
+Key: VPSRAQZrmk:  [ 0.00  0.00 ]
+Key: VPSRAQZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAQZrr:  [ 0.00  0.00 ]
+Key: VPSRAQZrrk:  [ 0.00  0.00 ]
+Key: VPSRAQZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAVDYrm:  [ 0.00  0.00 ]
+Key: VPSRAVDYrr:  [ 0.00  0.00 ]
+Key: VPSRAVDZ:  [ 0.00  0.00 ]
+Key: VPSRAVDZrm:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmb:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmbk:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmk:  [ 0.00  0.00 ]
+Key: VPSRAVDZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAVDZrr:  [ 0.00  0.00 ]
+Key: VPSRAVDZrrk:  [ 0.00  0.00 ]
+Key: VPSRAVDZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAVDrm:  [ 0.00  0.00 ]
+Key: VPSRAVDrr:  [ 0.00  0.00 ]
+Key: VPSRAVQZ:  [ 0.00  0.00 ]
+Key: VPSRAVQZrm:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmb:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmbk:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmk:  [ 0.00  0.00 ]
+Key: VPSRAVQZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAVQZrr:  [ 0.00  0.00 ]
+Key: VPSRAVQZrrk:  [ 0.00  0.00 ]
+Key: VPSRAVQZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAVWZ:  [ 0.00  0.00 ]
+Key: VPSRAVWZrm:  [ 0.00  0.00 ]
+Key: VPSRAVWZrmk:  [ 0.00  0.00 ]
+Key: VPSRAVWZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAVWZrr:  [ 0.00  0.00 ]
+Key: VPSRAVWZrrk:  [ 0.00  0.00 ]
+Key: VPSRAVWZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAWYri:  [ 0.00  0.00 ]
+Key: VPSRAWYrm:  [ 0.00  0.00 ]
+Key: VPSRAWYrr:  [ 0.00  0.00 ]
+Key: VPSRAWZ:  [ 0.00  0.00 ]
+Key: VPSRAWZmi:  [ 0.00  0.00 ]
+Key: VPSRAWZmik:  [ 0.00  0.00 ]
+Key: VPSRAWZmikz:  [ 0.00  0.00 ]
+Key: VPSRAWZri:  [ 0.00  0.00 ]
+Key: VPSRAWZrik:  [ 0.00  0.00 ]
+Key: VPSRAWZrikz:  [ 0.00  0.00 ]
+Key: VPSRAWZrm:  [ 0.00  0.00 ]
+Key: VPSRAWZrmk:  [ 0.00  0.00 ]
+Key: VPSRAWZrmkz:  [ 0.00  0.00 ]
+Key: VPSRAWZrr:  [ 0.00  0.00 ]
+Key: VPSRAWZrrk:  [ 0.00  0.00 ]
+Key: VPSRAWZrrkz:  [ 0.00  0.00 ]
+Key: VPSRAWri:  [ 0.00  0.00 ]
+Key: VPSRAWrm:  [ 0.00  0.00 ]
+Key: VPSRAWrr:  [ 0.00  0.00 ]
+Key: VPSRLDQYri:  [ 0.00  0.00 ]
+Key: VPSRLDQZ:  [ 0.00  0.00 ]
+Key: VPSRLDQZmi:  [ 0.00  0.00 ]
+Key: VPSRLDQZri:  [ 0.00  0.00 ]
+Key: VPSRLDQri:  [ 0.00  0.00 ]
+Key: VPSRLDYri:  [ 0.00  0.00 ]
+Key: VPSRLDYrm:  [ 0.00  0.00 ]
+Key: VPSRLDYrr:  [ 0.00  0.00 ]
+Key: VPSRLDZ:  [ 0.00  0.00 ]
+Key: VPSRLDZmbi:  [ 0.00  0.00 ]
+Key: VPSRLDZmbik:  [ 0.00  0.00 ]
+Key: VPSRLDZmbikz:  [ 0.00  0.00 ]
+Key: VPSRLDZmi:  [ 0.00  0.00 ]
+Key: VPSRLDZmik:  [ 0.00  0.00 ]
+Key: VPSRLDZmikz:  [ 0.00  0.00 ]
+Key: VPSRLDZri:  [ 0.00  0.00 ]
+Key: VPSRLDZrik:  [ 0.00  0.00 ]
+Key: VPSRLDZrikz:  [ 0.00  0.00 ]
+Key: VPSRLDZrm:  [ 0.00  0.00 ]
+Key: VPSRLDZrmk:  [ 0.00  0.00 ]
+Key: VPSRLDZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLDZrr:  [ 0.00  0.00 ]
+Key: VPSRLDZrrk:  [ 0.00  0.00 ]
+Key: VPSRLDZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLDri:  [ 0.00  0.00 ]
+Key: VPSRLDrm:  [ 0.00  0.00 ]
+Key: VPSRLDrr:  [ 0.00  0.00 ]
+Key: VPSRLQYri:  [ 0.00  0.00 ]
+Key: VPSRLQYrm:  [ 0.00  0.00 ]
+Key: VPSRLQYrr:  [ 0.00  0.00 ]
+Key: VPSRLQZ:  [ 0.00  0.00 ]
+Key: VPSRLQZmbi:  [ 0.00  0.00 ]
+Key: VPSRLQZmbik:  [ 0.00  0.00 ]
+Key: VPSRLQZmbikz:  [ 0.00  0.00 ]
+Key: VPSRLQZmi:  [ 0.00  0.00 ]
+Key: VPSRLQZmik:  [ 0.00  0.00 ]
+Key: VPSRLQZmikz:  [ 0.00  0.00 ]
+Key: VPSRLQZri:  [ 0.00  0.00 ]
+Key: VPSRLQZrik:  [ 0.00  0.00 ]
+Key: VPSRLQZrikz:  [ 0.00  0.00 ]
+Key: VPSRLQZrm:  [ 0.00  0.00 ]
+Key: VPSRLQZrmk:  [ 0.00  0.00 ]
+Key: VPSRLQZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLQZrr:  [ 0.00  0.00 ]
+Key: VPSRLQZrrk:  [ 0.00  0.00 ]
+Key: VPSRLQZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLQri:  [ 0.00  0.00 ]
+Key: VPSRLQrm:  [ 0.00  0.00 ]
+Key: VPSRLQrr:  [ 0.00  0.00 ]
+Key: VPSRLVDYrm:  [ 0.00  0.00 ]
+Key: VPSRLVDYrr:  [ 0.00  0.00 ]
+Key: VPSRLVDZ:  [ 0.00  0.00 ]
+Key: VPSRLVDZrm:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmb:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmbk:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmbkz:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmk:  [ 0.00  0.00 ]
+Key: VPSRLVDZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLVDZrr:  [ 0.00  0.00 ]
+Key: VPSRLVDZrrk:  [ 0.00  0.00 ]
+Key: VPSRLVDZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLVDrm:  [ 0.00  0.00 ]
+Key: VPSRLVDrr:  [ 0.00  0.00 ]
+Key: VPSRLVQYrm:  [ 0.00  0.00 ]
+Key: VPSRLVQYrr:  [ 0.00  0.00 ]
+Key: VPSRLVQZ:  [ 0.00  0.00 ]
+Key: VPSRLVQZrm:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmb:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmbk:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmbkz:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmk:  [ 0.00  0.00 ]
+Key: VPSRLVQZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLVQZrr:  [ 0.00  0.00 ]
+Key: VPSRLVQZrrk:  [ 0.00  0.00 ]
+Key: VPSRLVQZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLVQrm:  [ 0.00  0.00 ]
+Key: VPSRLVQrr:  [ 0.00  0.00 ]
+Key: VPSRLVWZ:  [ 0.00  0.00 ]
+Key: VPSRLVWZrm:  [ 0.00  0.00 ]
+Key: VPSRLVWZrmk:  [ 0.00  0.00 ]
+Key: VPSRLVWZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLVWZrr:  [ 0.00  0.00 ]
+Key: VPSRLVWZrrk:  [ 0.00  0.00 ]
+Key: VPSRLVWZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLWYri:  [ 0.00  0.00 ]
+Key: VPSRLWYrm:  [ 0.00  0.00 ]
+Key: VPSRLWYrr:  [ 0.00  0.00 ]
+Key: VPSRLWZ:  [ 0.00  0.00 ]
+Key: VPSRLWZmi:  [ 0.00  0.00 ]
+Key: VPSRLWZmik:  [ 0.00  0.00 ]
+Key: VPSRLWZmikz:  [ 0.00  0.00 ]
+Key: VPSRLWZri:  [ 0.00  0.00 ]
+Key: VPSRLWZrik:  [ 0.00  0.00 ]
+Key: VPSRLWZrikz:  [ 0.00  0.00 ]
+Key: VPSRLWZrm:  [ 0.00  0.00 ]
+Key: VPSRLWZrmk:  [ 0.00  0.00 ]
+Key: VPSRLWZrmkz:  [ 0.00  0.00 ]
+Key: VPSRLWZrr:  [ 0.00  0.00 ]
+Key: VPSRLWZrrk:  [ 0.00  0.00 ]
+Key: VPSRLWZrrkz:  [ 0.00  0.00 ]
+Key: VPSRLWri:  [ 0.00  0.00 ]
+Key: VPSRLWrm:  [ 0.00  0.00 ]
+Key: VPSRLWrr:  [ 0.00  0.00 ]
+Key: VPSUBBYrm:  [ 0.00  0.00 ]
+Key: VPSUBBYrr:  [ 0.00  0.00 ]
+Key: VPSUBBZ:  [ 0.00  0.00 ]
+Key: VPSUBBZrm:  [ 0.00  0.00 ]
+Key: VPSUBBZrmk:  [ 0.00  0.00 ]
+Key: VPSUBBZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBBZrr:  [ 0.00  0.00 ]
+Key: VPSUBBZrrk:  [ 0.00  0.00 ]
+Key: VPSUBBZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBBrm:  [ 0.00  0.00 ]
+Key: VPSUBBrr:  [ 0.00  0.00 ]
+Key: VPSUBDYrm:  [ 0.00  0.00 ]
+Key: VPSUBDYrr:  [ 0.00  0.00 ]
+Key: VPSUBDZ:  [ 0.00  0.00 ]
+Key: VPSUBDZrm:  [ 0.00  0.00 ]
+Key: VPSUBDZrmb:  [ 0.00  0.00 ]
+Key: VPSUBDZrmbk:  [ 0.00  0.00 ]
+Key: VPSUBDZrmbkz:  [ 0.00  0.00 ]
+Key: VPSUBDZrmk:  [ 0.00  0.00 ]
+Key: VPSUBDZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBDZrr:  [ 0.00  0.00 ]
+Key: VPSUBDZrrk:  [ 0.00  0.00 ]
+Key: VPSUBDZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBDrm:  [ 0.00  0.00 ]
+Key: VPSUBDrr:  [ 0.00  0.00 ]
+Key: VPSUBQYrm:  [ 0.00  0.00 ]
+Key: VPSUBQYrr:  [ 0.00  0.00 ]
+Key: VPSUBQZ:  [ 0.00  0.00 ]
+Key: VPSUBQZrm:  [ 0.00  0.00 ]
+Key: VPSUBQZrmb:  [ 0.00  0.00 ]
+Key: VPSUBQZrmbk:  [ 0.00  0.00 ]
+Key: VPSUBQZrmbkz:  [ 0.00  0.00 ]
+Key: VPSUBQZrmk:  [ 0.00  0.00 ]
+Key: VPSUBQZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBQZrr:  [ 0.00  0.00 ]
+Key: VPSUBQZrrk:  [ 0.00  0.00 ]
+Key: VPSUBQZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBQrm:  [ 0.00  0.00 ]
+Key: VPSUBQrr:  [ 0.00  0.00 ]
+Key: VPSUBSBYrm:  [ 0.00  0.00 ]
+Key: VPSUBSBYrr:  [ 0.00  0.00 ]
+Key: VPSUBSBZ:  [ 0.00  0.00 ]
+Key: VPSUBSBZrm:  [ 0.00  0.00 ]
+Key: VPSUBSBZrmk:  [ 0.00  0.00 ]
+Key: VPSUBSBZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBSBZrr:  [ 0.00  0.00 ]
+Key: VPSUBSBZrrk:  [ 0.00  0.00 ]
+Key: VPSUBSBZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBSBrm:  [ 0.00  0.00 ]
+Key: VPSUBSBrr:  [ 0.00  0.00 ]
+Key: VPSUBSWYrm:  [ 0.00  0.00 ]
+Key: VPSUBSWYrr:  [ 0.00  0.00 ]
+Key: VPSUBSWZ:  [ 0.00  0.00 ]
+Key: VPSUBSWZrm:  [ 0.00  0.00 ]
+Key: VPSUBSWZrmk:  [ 0.00  0.00 ]
+Key: VPSUBSWZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBSWZrr:  [ 0.00  0.00 ]
+Key: VPSUBSWZrrk:  [ 0.00  0.00 ]
+Key: VPSUBSWZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBSWrm:  [ 0.00  0.00 ]
+Key: VPSUBSWrr:  [ 0.00  0.00 ]
+Key: VPSUBUSBYrm:  [ 0.00  0.00 ]
+Key: VPSUBUSBYrr:  [ 0.00  0.00 ]
+Key: VPSUBUSBZ:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrm:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrmk:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrr:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrrk:  [ 0.00  0.00 ]
+Key: VPSUBUSBZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBUSBrm:  [ 0.00  0.00 ]
+Key: VPSUBUSBrr:  [ 0.00  0.00 ]
+Key: VPSUBUSWYrm:  [ 0.00  0.00 ]
+Key: VPSUBUSWYrr:  [ 0.00  0.00 ]
+Key: VPSUBUSWZ:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrm:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrmk:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrr:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrrk:  [ 0.00  0.00 ]
+Key: VPSUBUSWZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBUSWrm:  [ 0.00  0.00 ]
+Key: VPSUBUSWrr:  [ 0.00  0.00 ]
+Key: VPSUBWYrm:  [ 0.00  0.00 ]
+Key: VPSUBWYrr:  [ 0.00  0.00 ]
+Key: VPSUBWZ:  [ 0.00  0.00 ]
+Key: VPSUBWZrm:  [ 0.00  0.00 ]
+Key: VPSUBWZrmk:  [ 0.00  0.00 ]
+Key: VPSUBWZrmkz:  [ 0.00  0.00 ]
+Key: VPSUBWZrr:  [ 0.00  0.00 ]
+Key: VPSUBWZrrk:  [ 0.00  0.00 ]
+Key: VPSUBWZrrkz:  [ 0.00  0.00 ]
+Key: VPSUBWrm:  [ 0.00  0.00 ]
+Key: VPSUBWrr:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZ:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmbi:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmbik:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmbikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmi:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmik:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrmikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrri:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrrik:  [ 0.00  0.00 ]
+Key: VPTERNLOGDZrrikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZ:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmbi:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmbik:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmbikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmi:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmik:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrmikz:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrri:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrrik:  [ 0.00  0.00 ]
+Key: VPTERNLOGQZrrikz:  [ 0.00  0.00 ]
+Key: VPTESTMBZ:  [ 0.00  0.00 ]
+Key: VPTESTMBZrm:  [ 0.00  0.00 ]
+Key: VPTESTMBZrmk:  [ 0.00  0.00 ]
+Key: VPTESTMBZrr:  [ 0.00  0.00 ]
+Key: VPTESTMBZrrk:  [ 0.00  0.00 ]
+Key: VPTESTMDZ:  [ 0.00  0.00 ]
+Key: VPTESTMDZrm:  [ 0.00  0.00 ]
+Key: VPTESTMDZrmb:  [ 0.00  0.00 ]
+Key: VPTESTMDZrmbk:  [ 0.00  0.00 ]
+Key: VPTESTMDZrmk:  [ 0.00  0.00 ]
+Key: VPTESTMDZrr:  [ 0.00  0.00 ]
+Key: VPTESTMDZrrk:  [ 0.00  0.00 ]
+Key: VPTESTMQZ:  [ 0.00  0.00 ]
+Key: VPTESTMQZrm:  [ 0.00  0.00 ]
+Key: VPTESTMQZrmb:  [ 0.00  0.00 ]
+Key: VPTESTMQZrmbk:  [ 0.00  0.00 ]
+Key: VPTESTMQZrmk:  [ 0.00  0.00 ]
+Key: VPTESTMQZrr:  [ 0.00  0.00 ]
+Key: VPTESTMQZrrk:  [ 0.00  0.00 ]
+Key: VPTESTMWZ:  [ 0.00  0.00 ]
+Key: VPTESTMWZrm:  [ 0.00  0.00 ]
+Key: VPTESTMWZrmk:  [ 0.00  0.00 ]
+Key: VPTESTMWZrr:  [ 0.00  0.00 ]
+Key: VPTESTMWZrrk:  [ 0.00  0.00 ]
+Key: VPTESTNMBZ:  [ 0.00  0.00 ]
+Key: VPTESTNMBZrm:  [ 0.00  0.00 ]
+Key: VPTESTNMBZrmk:  [ 0.00  0.00 ]
+Key: VPTESTNMBZrr:  [ 0.00  0.00 ]
+Key: VPTESTNMBZrrk:  [ 0.00  0.00 ]
+Key: VPTESTNMDZ:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrm:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrmb:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrmbk:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrmk:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrr:  [ 0.00  0.00 ]
+Key: VPTESTNMDZrrk:  [ 0.00  0.00 ]
+Key: VPTESTNMQZ:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrm:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrmb:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrmbk:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrmk:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrr:  [ 0.00  0.00 ]
+Key: VPTESTNMQZrrk:  [ 0.00  0.00 ]
+Key: VPTESTNMWZ:  [ 0.00  0.00 ]
+Key: VPTESTNMWZrm:  [ 0.00  0.00 ]
+Key: VPTESTNMWZrmk:  [ 0.00  0.00 ]
+Key: VPTESTNMWZrr:  [ 0.00  0.00 ]
+Key: VPTESTNMWZrrk:  [ 0.00  0.00 ]
+Key: VPTESTYrm:  [ 0.00  0.00 ]
+Key: VPTESTYrr:  [ 0.00  0.00 ]
+Key: VPTESTrm:  [ 0.00  0.00 ]
+Key: VPTESTrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZ:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHBWrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZ:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmb:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmbk:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHDQrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZ:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmb:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmbk:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHQDQrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZ:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDrm:  [ 0.00  0.00 ]
+Key: VPUNPCKHWDrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZ:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLBWrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZ:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmb:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmbk:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLDQrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZ:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmb:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmbk:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmbkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLQDQrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDYrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDYrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZ:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrmk:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrmkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrr:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrrk:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDZrrkz:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDrm:  [ 0.00  0.00 ]
+Key: VPUNPCKLWDrr:  [ 0.00  0.00 ]
+Key: VPXORDZ:  [ 0.00  0.00 ]
+Key: VPXORDZrm:  [ 0.00  0.00 ]
+Key: VPXORDZrmb:  [ 0.00  0.00 ]
+Key: VPXORDZrmbk:  [ 0.00  0.00 ]
+Key: VPXORDZrmbkz:  [ 0.00  0.00 ]
+Key: VPXORDZrmk:  [ 0.00  0.00 ]
+Key: VPXORDZrmkz:  [ 0.00  0.00 ]
+Key: VPXORDZrr:  [ 0.00  0.00 ]
+Key: VPXORDZrrk:  [ 0.00  0.00 ]
+Key: VPXORDZrrkz:  [ 0.00  0.00 ]
+Key: VPXORQZ:  [ 0.00  0.00 ]
+Key: VPXORQZrm:  [ 0.00  0.00 ]
+Key: VPXORQZrmb:  [ 0.00  0.00 ]
+Key: VPXORQZrmbk:  [ 0.00  0.00 ]
+Key: VPXORQZrmbkz:  [ 0.00  0.00 ]
+Key: VPXORQZrmk:  [ 0.00  0.00 ]
+Key: VPXORQZrmkz:  [ 0.00  0.00 ]
+Key: VPXORQZrr:  [ 0.00  0.00 ]
+Key: VPXORQZrrk:  [ 0.00  0.00 ]
+Key: VPXORQZrrkz:  [ 0.00  0.00 ]
+Key: VPXORYrm:  [ 0.00  0.00 ]
+Key: VPXORYrr:  [ 0.00  0.00 ]
+Key: VPXORrm:  [ 0.00  0.00 ]
+Key: VPXORrr:  [ 0.00  0.00 ]
+Key: VRANGEPDZ:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmbi:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmbik:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmbikz:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmi:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmik:  [ 0.00  0.00 ]
+Key: VRANGEPDZrmikz:  [ 0.00  0.00 ]
+Key: VRANGEPDZrri:  [ 0.00  0.00 ]
+Key: VRANGEPDZrrib:  [ 0.00  0.00 ]
+Key: VRANGEPDZrribk:  [ 0.00  0.00 ]
+Key: VRANGEPDZrribkz:  [ 0.00  0.00 ]
+Key: VRANGEPDZrrik:  [ 0.00  0.00 ]
+Key: VRANGEPDZrrikz:  [ 0.00  0.00 ]
+Key: VRANGEPSZ:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmbi:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmbik:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmbikz:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmi:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmik:  [ 0.00  0.00 ]
+Key: VRANGEPSZrmikz:  [ 0.00  0.00 ]
+Key: VRANGEPSZrri:  [ 0.00  0.00 ]
+Key: VRANGEPSZrrib:  [ 0.00  0.00 ]
+Key: VRANGEPSZrribk:  [ 0.00  0.00 ]
+Key: VRANGEPSZrribkz:  [ 0.00  0.00 ]
+Key: VRANGEPSZrrik:  [ 0.00  0.00 ]
+Key: VRANGEPSZrrikz:  [ 0.00  0.00 ]
+Key: VRANGESDZrmi:  [ 0.00  0.00 ]
+Key: VRANGESDZrmik:  [ 0.00  0.00 ]
+Key: VRANGESDZrmikz:  [ 0.00  0.00 ]
+Key: VRANGESDZrri:  [ 0.00  0.00 ]
+Key: VRANGESDZrrib:  [ 0.00  0.00 ]
+Key: VRANGESDZrribk:  [ 0.00  0.00 ]
+Key: VRANGESDZrribkz:  [ 0.00  0.00 ]
+Key: VRANGESDZrrik:  [ 0.00  0.00 ]
+Key: VRANGESDZrrikz:  [ 0.00  0.00 ]
+Key: VRANGESSZrmi:  [ 0.00  0.00 ]
+Key: VRANGESSZrmik:  [ 0.00  0.00 ]
+Key: VRANGESSZrmikz:  [ 0.00  0.00 ]
+Key: VRANGESSZrri:  [ 0.00  0.00 ]
+Key: VRANGESSZrrib:  [ 0.00  0.00 ]
+Key: VRANGESSZrribk:  [ 0.00  0.00 ]
+Key: VRANGESSZrribkz:  [ 0.00  0.00 ]
+Key: VRANGESSZrrik:  [ 0.00  0.00 ]
+Key: VRANGESSZrrikz:  [ 0.00  0.00 ]
+Key: VRCP:  [ 0.00  0.00 ]
+Key: VRCPBF:  [ 0.00  0.00 ]
+Key: VRCPPHZ:  [ 0.00  0.00 ]
+Key: VRCPPHZm:  [ 0.00  0.00 ]
+Key: VRCPPHZmb:  [ 0.00  0.00 ]
+Key: VRCPPHZmbk:  [ 0.00  0.00 ]
+Key: VRCPPHZmbkz:  [ 0.00  0.00 ]
+Key: VRCPPHZmk:  [ 0.00  0.00 ]
+Key: VRCPPHZmkz:  [ 0.00  0.00 ]
+Key: VRCPPHZr:  [ 0.00  0.00 ]
+Key: VRCPPHZrk:  [ 0.00  0.00 ]
+Key: VRCPPHZrkz:  [ 0.00  0.00 ]
+Key: VRCPPSYm:  [ 0.00  0.00 ]
+Key: VRCPPSYr:  [ 0.00  0.00 ]
+Key: VRCPPSm:  [ 0.00  0.00 ]
+Key: VRCPPSr:  [ 0.00  0.00 ]
+Key: VRCPSHZrm:  [ 0.00  0.00 ]
+Key: VRCPSHZrmk:  [ 0.00  0.00 ]
+Key: VRCPSHZrmkz:  [ 0.00  0.00 ]
+Key: VRCPSHZrr:  [ 0.00  0.00 ]
+Key: VRCPSHZrrk:  [ 0.00  0.00 ]
+Key: VRCPSHZrrkz:  [ 0.00  0.00 ]
+Key: VRCPSSm:  [ 0.00  0.00 ]
+Key: VRCPSSm_Int:  [ 0.00  0.00 ]
+Key: VRCPSSr:  [ 0.00  0.00 ]
+Key: VRCPSSr_Int:  [ 0.00  0.00 ]
+Key: VREDUCEBF:  [ 0.00  0.00 ]
+Key: VREDUCEPDZ:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmbi:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmbik:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmbikz:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmi:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmik:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrri:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrrib:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrribk:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrrik:  [ 0.00  0.00 ]
+Key: VREDUCEPDZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCEPHZ:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmbi:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmbik:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmbikz:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmi:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmik:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrri:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrrib:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrribk:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrrik:  [ 0.00  0.00 ]
+Key: VREDUCEPHZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCEPSZ:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmbi:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmbik:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmbikz:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmi:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmik:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrri:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrrib:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrribk:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrrik:  [ 0.00  0.00 ]
+Key: VREDUCEPSZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCESDZrmi:  [ 0.00  0.00 ]
+Key: VREDUCESDZrmik:  [ 0.00  0.00 ]
+Key: VREDUCESDZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCESDZrri:  [ 0.00  0.00 ]
+Key: VREDUCESDZrrib:  [ 0.00  0.00 ]
+Key: VREDUCESDZrribk:  [ 0.00  0.00 ]
+Key: VREDUCESDZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCESDZrrik:  [ 0.00  0.00 ]
+Key: VREDUCESDZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCESHZrmi:  [ 0.00  0.00 ]
+Key: VREDUCESHZrmik:  [ 0.00  0.00 ]
+Key: VREDUCESHZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCESHZrri:  [ 0.00  0.00 ]
+Key: VREDUCESHZrrib:  [ 0.00  0.00 ]
+Key: VREDUCESHZrribk:  [ 0.00  0.00 ]
+Key: VREDUCESHZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCESHZrrik:  [ 0.00  0.00 ]
+Key: VREDUCESHZrrikz:  [ 0.00  0.00 ]
+Key: VREDUCESSZrmi:  [ 0.00  0.00 ]
+Key: VREDUCESSZrmik:  [ 0.00  0.00 ]
+Key: VREDUCESSZrmikz:  [ 0.00  0.00 ]
+Key: VREDUCESSZrri:  [ 0.00  0.00 ]
+Key: VREDUCESSZrrib:  [ 0.00  0.00 ]
+Key: VREDUCESSZrribk:  [ 0.00  0.00 ]
+Key: VREDUCESSZrribkz:  [ 0.00  0.00 ]
+Key: VREDUCESSZrrik:  [ 0.00  0.00 ]
+Key: VREDUCESSZrrikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEBF:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZ:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmbi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmbik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmbikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrmikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrrib:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrribk:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrribkz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrrik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPDZrrikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZ:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmbi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmbik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmbikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrmikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrrib:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrribk:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrribkz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrrik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPHZrrikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZ:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmbi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmbik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmbikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrmikz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrrib:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrribk:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrribkz:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrrik:  [ 0.00  0.00 ]
+Key: VRNDSCALEPSZrrikz:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrmi_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrmik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrmikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrri_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrrib_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrribk_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrribkz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrrik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESDZrrikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrmi_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrmik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrmikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrri_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrrib_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrribk_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrribkz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrrik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESHZrrikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrmi:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrmi_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrmik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrmikz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrri:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrri_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrrib_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrribk_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrribkz_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrrik_Int:  [ 0.00  0.00 ]
+Key: VRNDSCALESSZrrikz_Int:  [ 0.00  0.00 ]
+Key: VROUNDPDYmi:  [ 0.00  0.00 ]
+Key: VROUNDPDYri:  [ 0.00  0.00 ]
+Key: VROUNDPDmi:  [ 0.00  0.00 ]
+Key: VROUNDPDri:  [ 0.00  0.00 ]
+Key: VROUNDPSYmi:  [ 0.00  0.00 ]
+Key: VROUNDPSYri:  [ 0.00  0.00 ]
+Key: VROUNDPSmi:  [ 0.00  0.00 ]
+Key: VROUNDPSri:  [ 0.00  0.00 ]
+Key: VROUNDSDmi:  [ 0.00  0.00 ]
+Key: VROUNDSDmi_Int:  [ 0.00  0.00 ]
+Key: VROUNDSDri:  [ 0.00  0.00 ]
+Key: VROUNDSDri_Int:  [ 0.00  0.00 ]
+Key: VROUNDSSmi:  [ 0.00  0.00 ]
+Key: VROUNDSSmi_Int:  [ 0.00  0.00 ]
+Key: VROUNDSSri:  [ 0.00  0.00 ]
+Key: VROUNDSSri_Int:  [ 0.00  0.00 ]
+Key: VRSQRT:  [ 0.00  0.00 ]
+Key: VRSQRTBF:  [ 0.00  0.00 ]
+Key: VRSQRTPHZ:  [ 0.00  0.00 ]
+Key: VRSQRTPHZm:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmb:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmbk:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmbkz:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmk:  [ 0.00  0.00 ]
+Key: VRSQRTPHZmkz:  [ 0.00  0.00 ]
+Key: VRSQRTPHZr:  [ 0.00  0.00 ]
+Key: VRSQRTPHZrk:  [ 0.00  0.00 ]
+Key: VRSQRTPHZrkz:  [ 0.00  0.00 ]
+Key: VRSQRTPSYm:  [ 0.00  0.00 ]
+Key: VRSQRTPSYr:  [ 0.00  0.00 ]
+Key: VRSQRTPSm:  [ 0.00  0.00 ]
+Key: VRSQRTPSr:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrm:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrmk:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrmkz:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrr:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrrk:  [ 0.00  0.00 ]
+Key: VRSQRTSHZrrkz:  [ 0.00  0.00 ]
+Key: VRSQRTSSm:  [ 0.00  0.00 ]
+Key: VRSQRTSSm_Int:  [ 0.00  0.00 ]
+Key: VRSQRTSSr:  [ 0.00  0.00 ]
+Key: VRSQRTSSr_Int:  [ 0.00  0.00 ]
+Key: VSCALEFBF:  [ 0.00  0.00 ]
+Key: VSCALEFPDZ:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrm:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmb:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmbk:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrr:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrb:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrbk:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFPDZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFPHZ:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrm:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmb:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmbk:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrr:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrb:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrbk:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFPHZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFPSZ:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrm:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmb:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmbk:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrr:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrb:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrbk:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrbkz:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFPSZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrm:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrr:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFSDZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrm:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrr:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFSHZrrkz:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrm:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrmk:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrmkz:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrr:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrk:  [ 0.00  0.00 ]
+Key: VSCALEFSSZrrkz:  [ 0.00  0.00 ]
+Key: VSCATTERDPDZ:  [ 0.00  0.00 ]
+Key: VSCATTERDPDZmr:  [ 0.00  0.00 ]
+Key: VSCATTERDPSZ:  [ 0.00  0.00 ]
+Key: VSCATTERDPSZmr:  [ 0.00  0.00 ]
+Key: VSCATTERPF:  [ 0.00  0.00 ]
+Key: VSCATTERQPDZ:  [ 0.00  0.00 ]
+Key: VSCATTERQPDZmr:  [ 0.00  0.00 ]
+Key: VSCATTERQPSZ:  [ 0.00  0.00 ]
+Key: VSCATTERQPSZmr:  [ 0.00  0.00 ]
+Key: VSHA:  [ 0.00  0.00 ]
+Key: VSHUFF:  [ 0.00  0.00 ]
+Key: VSHUFI:  [ 0.00  0.00 ]
+Key: VSHUFPDYrmi:  [ 0.00  0.00 ]
+Key: VSHUFPDYrri:  [ 0.00  0.00 ]
+Key: VSHUFPDZ:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmbi:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmbik:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmbikz:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmi:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmik:  [ 0.00  0.00 ]
+Key: VSHUFPDZrmikz:  [ 0.00  0.00 ]
+Key: VSHUFPDZrri:  [ 0.00  0.00 ]
+Key: VSHUFPDZrrik:  [ 0.00  0.00 ]
+Key: VSHUFPDZrrikz:  [ 0.00  0.00 ]
+Key: VSHUFPDrmi:  [ 0.00  0.00 ]
+Key: VSHUFPDrri:  [ 0.00  0.00 ]
+Key: VSHUFPSYrmi:  [ 0.00  0.00 ]
+Key: VSHUFPSYrri:  [ 0.00  0.00 ]
+Key: VSHUFPSZ:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmbi:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmbik:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmbikz:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmi:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmik:  [ 0.00  0.00 ]
+Key: VSHUFPSZrmikz:  [ 0.00  0.00 ]
+Key: VSHUFPSZrri:  [ 0.00  0.00 ]
+Key: VSHUFPSZrrik:  [ 0.00  0.00 ]
+Key: VSHUFPSZrrikz:  [ 0.00  0.00 ]
+Key: VSHUFPSrmi:  [ 0.00  0.00 ]
+Key: VSHUFPSrri:  [ 0.00  0.00 ]
+Key: VSM:  [ 0.00  0.00 ]
+Key: VSQRTBF:  [ 0.00  0.00 ]
+Key: VSQRTPDYm:  [ 0.00  0.00 ]
+Key: VSQRTPDYr:  [ 0.00  0.00 ]
+Key: VSQRTPDZ:  [ 0.00  0.00 ]
+Key: VSQRTPDZm:  [ 0.00  0.00 ]
+Key: VSQRTPDZmb:  [ 0.00  0.00 ]
+Key: VSQRTPDZmbk:  [ 0.00  0.00 ]
+Key: VSQRTPDZmbkz:  [ 0.00  0.00 ]
+Key: VSQRTPDZmk:  [ 0.00  0.00 ]
+Key: VSQRTPDZmkz:  [ 0.00  0.00 ]
+Key: VSQRTPDZr:  [ 0.00  0.00 ]
+Key: VSQRTPDZrb:  [ 0.00  0.00 ]
+Key: VSQRTPDZrbk:  [ 0.00  0.00 ]
+Key: VSQRTPDZrbkz:  [ 0.00  0.00 ]
+Key: VSQRTPDZrk:  [ 0.00  0.00 ]
+Key: VSQRTPDZrkz:  [ 0.00  0.00 ]
+Key: VSQRTPDm:  [ 0.00  0.00 ]
+Key: VSQRTPDr:  [ 0.00  0.00 ]
+Key: VSQRTPHZ:  [ 0.00  0.00 ]
+Key: VSQRTPHZm:  [ 0.00  0.00 ]
+Key: VSQRTPHZmb:  [ 0.00  0.00 ]
+Key: VSQRTPHZmbk:  [ 0.00  0.00 ]
+Key: VSQRTPHZmbkz:  [ 0.00  0.00 ]
+Key: VSQRTPHZmk:  [ 0.00  0.00 ]
+Key: VSQRTPHZmkz:  [ 0.00  0.00 ]
+Key: VSQRTPHZr:  [ 0.00  0.00 ]
+Key: VSQRTPHZrb:  [ 0.00  0.00 ]
+Key: VSQRTPHZrbk:  [ 0.00  0.00 ]
+Key: VSQRTPHZrbkz:  [ 0.00  0.00 ]
+Key: VSQRTPHZrk:  [ 0.00  0.00 ]
+Key: VSQRTPHZrkz:  [ 0.00  0.00 ]
+Key: VSQRTPSYm:  [ 0.00  0.00 ]
+Key: VSQRTPSYr:  [ 0.00  0.00 ]
+Key: VSQRTPSZ:  [ 0.00  0.00 ]
+Key: VSQRTPSZm:  [ 0.00  0.00 ]
+Key: VSQRTPSZmb:  [ 0.00  0.00 ]
+Key: VSQRTPSZmbk:  [ 0.00  0.00 ]
+Key: VSQRTPSZmbkz:  [ 0.00  0.00 ]
+Key: VSQRTPSZmk:  [ 0.00  0.00 ]
+Key: VSQRTPSZmkz:  [ 0.00  0.00 ]
+Key: VSQRTPSZr:  [ 0.00  0.00 ]
+Key: VSQRTPSZrb:  [ 0.00  0.00 ]
+Key: VSQRTPSZrbk:  [ 0.00  0.00 ]
+Key: VSQRTPSZrbkz:  [ 0.00  0.00 ]
+Key: VSQRTPSZrk:  [ 0.00  0.00 ]
+Key: VSQRTPSZrkz:  [ 0.00  0.00 ]
+Key: VSQRTPSm:  [ 0.00  0.00 ]
+Key: VSQRTPSr:  [ 0.00  0.00 ]
+Key: VSQRTSDZm:  [ 0.00  0.00 ]
+Key: VSQRTSDZm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZmk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZmkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZr:  [ 0.00  0.00 ]
+Key: VSQRTSDZr_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrb_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrbk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrbkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDZrkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDm:  [ 0.00  0.00 ]
+Key: VSQRTSDm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSDr:  [ 0.00  0.00 ]
+Key: VSQRTSDr_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZm:  [ 0.00  0.00 ]
+Key: VSQRTSHZm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZmk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZmkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZr:  [ 0.00  0.00 ]
+Key: VSQRTSHZr_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrb_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrbk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrbkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSHZrkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZm:  [ 0.00  0.00 ]
+Key: VSQRTSSZm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZmk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZmkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZr:  [ 0.00  0.00 ]
+Key: VSQRTSSZr_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrb_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrbk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrbkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrk_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSZrkz_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSm:  [ 0.00  0.00 ]
+Key: VSQRTSSm_Int:  [ 0.00  0.00 ]
+Key: VSQRTSSr:  [ 0.00  0.00 ]
+Key: VSQRTSSr_Int:  [ 0.00  0.00 ]
+Key: VSTMXCSR:  [ 0.00  0.00 ]
+Key: VSUBBF:  [ 0.00  0.00 ]
+Key: VSUBPDYrm:  [ 0.00  0.00 ]
+Key: VSUBPDYrr:  [ 0.00  0.00 ]
+Key: VSUBPDZ:  [ 0.00  0.00 ]
+Key: VSUBPDZrm:  [ 0.00  0.00 ]
+Key: VSUBPDZrmb:  [ 0.00  0.00 ]
+Key: VSUBPDZrmbk:  [ 0.00  0.00 ]
+Key: VSUBPDZrmbkz:  [ 0.00  0.00 ]
+Key: VSUBPDZrmk:  [ 0.00  0.00 ]
+Key: VSUBPDZrmkz:  [ 0.00  0.00 ]
+Key: VSUBPDZrr:  [ 0.00  0.00 ]
+Key: VSUBPDZrrb:  [ 0.00  0.00 ]
+Key: VSUBPDZrrbk:  [ 0.00  0.00 ]
+Key: VSUBPDZrrbkz:  [ 0.00  0.00 ]
+Key: VSUBPDZrrk:  [ 0.00  0.00 ]
+Key: VSUBPDZrrkz:  [ 0.00  0.00 ]
+Key: VSUBPDrm:  [ 0.00  0.00 ]
+Key: VSUBPDrr:  [ 0.00  0.00 ]
+Key: VSUBPHZ:  [ 0.00  0.00 ]
+Key: VSUBPHZrm:  [ 0.00  0.00 ]
+Key: VSUBPHZrmb:  [ 0.00  0.00 ]
+Key: VSUBPHZrmbk:  [ 0.00  0.00 ]
+Key: VSUBPHZrmbkz:  [ 0.00  0.00 ]
+Key: VSUBPHZrmk:  [ 0.00  0.00 ]
+Key: VSUBPHZrmkz:  [ 0.00  0.00 ]
+Key: VSUBPHZrr:  [ 0.00  0.00 ]
+Key: VSUBPHZrrb:  [ 0.00  0.00 ]
+Key: VSUBPHZrrbk:  [ 0.00  0.00 ]
+Key: VSUBPHZrrbkz:  [ 0.00  0.00 ]
+Key: VSUBPHZrrk:  [ 0.00  0.00 ]
+Key: VSUBPHZrrkz:  [ 0.00  0.00 ]
+Key: VSUBPSYrm:  [ 0.00  0.00 ]
+Key: VSUBPSYrr:  [ 0.00  0.00 ]
+Key: VSUBPSZ:  [ 0.00  0.00 ]
+Key: VSUBPSZrm:  [ 0.00  0.00 ]
+Key: VSUBPSZrmb:  [ 0.00  0.00 ]
+Key: VSUBPSZrmbk:  [ 0.00  0.00 ]
+Key: VSUBPSZrmbkz:  [ 0.00  0.00 ]
+Key: VSUBPSZrmk:  [ 0.00  0.00 ]
+Key: VSUBPSZrmkz:  [ 0.00  0.00 ]
+Key: VSUBPSZrr:  [ 0.00  0.00 ]
+Key: VSUBPSZrrb:  [ 0.00  0.00 ]
+Key: VSUBPSZrrbk:  [ 0.00  0.00 ]
+Key: VSUBPSZrrbkz:  [ 0.00  0.00 ]
+Key: VSUBPSZrrk:  [ 0.00  0.00 ]
+Key: VSUBPSZrrkz:  [ 0.00  0.00 ]
+Key: VSUBPSrm:  [ 0.00  0.00 ]
+Key: VSUBPSrr:  [ 0.00  0.00 ]
+Key: VSUBSDZrm:  [ 0.00  0.00 ]
+Key: VSUBSDZrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrmk_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrmkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrr:  [ 0.00  0.00 ]
+Key: VSUBSDZrr_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrk_Int:  [ 0.00  0.00 ]
+Key: VSUBSDZrrkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSDrm:  [ 0.00  0.00 ]
+Key: VSUBSDrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSDrr:  [ 0.00  0.00 ]
+Key: VSUBSDrr_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrm:  [ 0.00  0.00 ]
+Key: VSUBSHZrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrmk_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrmkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrr:  [ 0.00  0.00 ]
+Key: VSUBSHZrr_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrk_Int:  [ 0.00  0.00 ]
+Key: VSUBSHZrrkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrm:  [ 0.00  0.00 ]
+Key: VSUBSSZrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrmk_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrmkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrr:  [ 0.00  0.00 ]
+Key: VSUBSSZrr_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrbk_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrbkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrk_Int:  [ 0.00  0.00 ]
+Key: VSUBSSZrrkz_Int:  [ 0.00  0.00 ]
+Key: VSUBSSrm:  [ 0.00  0.00 ]
+Key: VSUBSSrm_Int:  [ 0.00  0.00 ]
+Key: VSUBSSrr:  [ 0.00  0.00 ]
+Key: VSUBSSrr_Int:  [ 0.00  0.00 ]
+Key: VTESTPDYrm:  [ 0.00  0.00 ]
+Key: VTESTPDYrr:  [ 0.00  0.00 ]
+Key: VTESTPDrm:  [ 0.00  0.00 ]
+Key: VTESTPDrr:  [ 0.00  0.00 ]
+Key: VTESTPSYrm:  [ 0.00  0.00 ]
+Key: VTESTPSYrr:  [ 0.00  0.00 ]
+Key: VTESTPSrm:  [ 0.00  0.00 ]
+Key: VTESTPSrr:  [ 0.00  0.00 ]
+Key: VUCOMISDZrm:  [ 0.00  0.00 ]
+Key: VUCOMISDZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISDZrr:  [ 0.00  0.00 ]
+Key: VUCOMISDZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMISDZrrb:  [ 0.00  0.00 ]
+Key: VUCOMISDrm:  [ 0.00  0.00 ]
+Key: VUCOMISDrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISDrr:  [ 0.00  0.00 ]
+Key: VUCOMISDrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMISHZrm:  [ 0.00  0.00 ]
+Key: VUCOMISHZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISHZrr:  [ 0.00  0.00 ]
+Key: VUCOMISHZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMISHZrrb:  [ 0.00  0.00 ]
+Key: VUCOMISSZrm:  [ 0.00  0.00 ]
+Key: VUCOMISSZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISSZrr:  [ 0.00  0.00 ]
+Key: VUCOMISSZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMISSZrrb:  [ 0.00  0.00 ]
+Key: VUCOMISSrm:  [ 0.00  0.00 ]
+Key: VUCOMISSrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMISSrr:  [ 0.00  0.00 ]
+Key: VUCOMISSrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrm:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrr:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSDZrrb_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrm:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrr:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSHZrrb_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrm:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrm_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrr:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrr_Int:  [ 0.00  0.00 ]
+Key: VUCOMXSSZrrb_Int:  [ 0.00  0.00 ]
+Key: VUNPCKHPDYrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPDYrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZ:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmb:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmbk:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmbkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmk:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrmkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrrk:  [ 0.00  0.00 ]
+Key: VUNPCKHPDZrrkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPDrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPDrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPSYrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPSYrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZ:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmb:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmbk:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmbkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmk:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrmkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrr:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrrk:  [ 0.00  0.00 ]
+Key: VUNPCKHPSZrrkz:  [ 0.00  0.00 ]
+Key: VUNPCKHPSrm:  [ 0.00  0.00 ]
+Key: VUNPCKHPSrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPDYrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPDYrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZ:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmb:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmbk:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmbkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmk:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrmkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrrk:  [ 0.00  0.00 ]
+Key: VUNPCKLPDZrrkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPDrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPDrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPSYrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPSYrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZ:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmb:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmbk:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmbkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmk:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrmkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrr:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrrk:  [ 0.00  0.00 ]
+Key: VUNPCKLPSZrrkz:  [ 0.00  0.00 ]
+Key: VUNPCKLPSrm:  [ 0.00  0.00 ]
+Key: VUNPCKLPSrr:  [ 0.00  0.00 ]
+Key: VXORPDYrm:  [ 0.00  0.00 ]
+Key: VXORPDYrr:  [ 0.00  0.00 ]
+Key: VXORPDZ:  [ 0.00  0.00 ]
+Key: VXORPDZrm:  [ 0.00  0.00 ]
+Key: VXORPDZrmb:  [ 0.00  0.00 ]
+Key: VXORPDZrmbk:  [ 0.00  0.00 ]
+Key: VXORPDZrmbkz:  [ 0.00  0.00 ]
+Key: VXORPDZrmk:  [ 0.00  0.00 ]
+Key: VXORPDZrmkz:  [ 0.00  0.00 ]
+Key: VXORPDZrr:  [ 0.00  0.00 ]
+Key: VXORPDZrrk:  [ 0.00  0.00 ]
+Key: VXORPDZrrkz:  [ 0.00  0.00 ]
+Key: VXORPDrm:  [ 0.00  0.00 ]
+Key: VXORPDrr:  [ 0.00  0.00 ]
+Key: VXORPSYrm:  [ 0.00  0.00 ]
+Key: VXORPSYrr:  [ 0.00  0.00 ]
+Key: VXORPSZ:  [ 0.00  0.00 ]
+Key: VXORPSZrm:  [ 0.00  0.00 ]
+Key: VXORPSZrmb:  [ 0.00  0.00 ]
+Key: VXORPSZrmbk:  [ 0.00  0.00 ]
+Key: VXORPSZrmbkz:  [ 0.00  0.00 ]
+Key: VXORPSZrmk:  [ 0.00  0.00 ]
+Key: VXORPSZrmkz:  [ 0.00  0.00 ]
+Key: VXORPSZrr:  [ 0.00  0.00 ]
+Key: VXORPSZrrk:  [ 0.00  0.00 ]
+Key: VXORPSZrrkz:  [ 0.00  0.00 ]
+Key: VXORPSrm:  [ 0.00  0.00 ]
+Key: VXORPSrr:  [ 0.00  0.00 ]
+Key: VZEROALL:  [ 0.00  0.00 ]
+Key: VZEROUPPER:  [ 0.00  0.00 ]
+Key: V_SET:  [ 0.00  0.00 ]
+Key: V_SETALLONES:  [ 0.00  0.00 ]
+Key: WAIT:  [ 0.00  0.00 ]
+Key: WBINVD:  [ 0.00  0.00 ]
+Key: WBNOINVD:  [ 0.00  0.00 ]
+Key: WRFLAGS:  [ 0.00  0.00 ]
+Key: WRFSBASE:  [ 0.00  0.00 ]
+Key: WRGSBASE:  [ 0.00  0.00 ]
+Key: WRMSR:  [ 0.00  0.00 ]
+Key: WRMSRLIST:  [ 0.00  0.00 ]
+Key: WRMSRNS:  [ 0.00  0.00 ]
+Key: WRMSRNSir:  [ 0.00  0.00 ]
+Key: WRMSRNSir_EVEX:  [ 0.00  0.00 ]
+Key: WRPKRUr:  [ 0.00  0.00 ]
+Key: WRSSD:  [ 0.00  0.00 ]
+Key: WRSSD_EVEX:  [ 0.00  0.00 ]
+Key: WRSSQ:  [ 0.00  0.00 ]
+Key: WRSSQ_EVEX:  [ 0.00  0.00 ]
+Key: WRUSSD:  [ 0.00  0.00 ]
+Key: WRUSSD_EVEX:  [ 0.00  0.00 ]
+Key: WRUSSQ:  [ 0.00  0.00 ]
+Key: WRUSSQ_EVEX:  [ 0.00  0.00 ]
+Key: XABORT:  [ 0.00  0.00 ]
+Key: XABORT_DEF:  [ 0.00  0.00 ]
+Key: XACQUIRE_PREFIX:  [ 0.00  0.00 ]
+Key: XADD:  [ 0.00  0.00 ]
+Key: XAM_F:  [ 0.00  0.00 ]
+Key: XAM_Fp:  [ 0.00  0.00 ]
+Key: XBEGIN:  [ 0.00  0.00 ]
+Key: XCHG:  [ 0.00  0.00 ]
+Key: XCH_F:  [ 0.00  0.00 ]
+Key: XCRYPTCBC:  [ 0.00  0.00 ]
+Key: XCRYPTCFB:  [ 0.00  0.00 ]
+Key: XCRYPTCTR:  [ 0.00  0.00 ]
+Key: XCRYPTECB:  [ 0.00  0.00 ]
+Key: XCRYPTOFB:  [ 0.00  0.00 ]
+Key: XEND:  [ 0.00  0.00 ]
+Key: XGETBV:  [ 0.00  0.00 ]
+Key: XLAT:  [ 0.00  0.00 ]
+Key: XOR:  [ 0.00  0.00 ]
+Key: XORPDrm:  [ 0.00  0.00 ]
+Key: XORPDrr:  [ 0.00  0.00 ]
+Key: XORPSrm:  [ 0.00  0.00 ]
+Key: XORPSrr:  [ 0.00  0.00 ]
+Key: XRELEASE_PREFIX:  [ 0.00  0.00 ]
+Key: XRESLDTRK:  [ 0.00  0.00 ]
+Key: XRSTOR:  [ 0.00  0.00 ]
+Key: XRSTORS:  [ 0.00  0.00 ]
+Key: XSAVE:  [ 0.00  0.00 ]
+Key: XSAVEC:  [ 0.00  0.00 ]
+Key: XSAVEOPT:  [ 0.00  0.00 ]
+Key: XSAVES:  [ 0.00  0.00 ]
+Key: XSETBV:  [ 0.00  0.00 ]
+Key: XSHA:  [ 0.00  0.00 ]
+Key: XSTORE:  [ 0.00  0.00 ]
+Key: XSUSLDTRK:  [ 0.00  0.00 ]
+Key: XTEST:  [ 0.00  0.00 ]
diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-basic.ll b/llvm/test/CodeGen/MIR2Vec/vocab-basic.ll
new file mode 100644
index 0000000..a57dd0b
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/vocab-basic.ll
@@ -0,0 +1,14 @@
+; REQUIRES: x86_64-linux
+; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_2D_vocab.json %s 2> %t1.log 
+; RUN: diff %S/Inputs/reference_x86_vocab_print.txt %t1.log
+
+; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-opc-weight=1 -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_2D_vocab.json %s 2> %t1.log 
+; RUN: diff %S/Inputs/reference_x86_vocab_print.txt %t1.log
+
+; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-opc-weight=0.5 -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_2D_vocab.json %s 2> %t1.log 
+; RUN: diff %S/Inputs/reference_x86_vocab_wo=0.5_print.txt %t1.log
+
+define dso_local void @test() {
+  entry:
+    ret void
+}
diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
new file mode 100644
index 0000000..1da516a
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
@@ -0,0 +1,15 @@
+; REQUIRES: x86_64-linux
+; RUN: not llc -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID
+; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM
+; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES
+; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS
+
+define dso_local void @test() {
+  entry:
+    ret void
+}
+
+; CHECK-INVALID: error: MIR2Vec vocabulary file path not specified; set it using --mir2vec-vocab-path
+; CHECK-ZERO-DIM: error: Dimension of 'entities' section of the vocabulary is zero
+; CHECK-NO-ENTITIES: error: Missing 'entities' section in vocabulary file
+; CHECK-INCONSISTENT-DIMS: error: All vectors in the 'entities' section of the vocabulary are not of the same dimension
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
index 4ad2d2c..4914357 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
@@ -23,6 +23,16 @@
 ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
 
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s
+
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s
+
 
 define float @atomic_load_f32_unordered(ptr %a) nounwind {
 ; RV32I-LABEL: atomic_load_f32_unordered:
@@ -171,6 +181,30 @@ define float @atomic_load_f32_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fmv.w.x fa0, a0
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_f32_acquire:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    lw.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    fmv.w.x fa0, a0
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_f32_acquire:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    lw a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    fmv.w.x fa0, a0
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_f32_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    lw.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    fmv.w.x fa0, a0
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_f32_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    lw a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    fmv.w.x fa0, a0
+; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic float, ptr %a acquire, align 4
   ret float %1
 }
@@ -256,6 +290,18 @@ define float @atomic_load_f32_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fmv.w.x fa0, a0
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_f32_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    lw.aq a0, (a0)
+; RV32IA-ZALASR-NEXT:    fmv.w.x fa0, a0
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_f32_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    lw.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    fmv.w.x fa0, a0
+; RV64IA-ZALASR-NEXT:    ret
   %1 = load atomic float, ptr %a seq_cst, align 4
   ret float %1
 }
@@ -414,6 +460,18 @@ define double @atomic_load_f64_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fmv.d.x fa0, a0
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_f64_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    ld.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    fmv.d.x fa0, a0
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_f64_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    ld a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    fmv.d.x fa0, a0
+; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic double, ptr %a acquire, align 8
   ret double %1
 }
@@ -484,6 +542,12 @@ define double @atomic_load_f64_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fmv.d.x fa0, a0
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_f64_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    ld.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    fmv.d.x fa0, a0
+; RV64IA-ZALASR-NEXT:    ret
   %1 = load atomic double, ptr %a seq_cst, align 8
   ret double %1
 }
@@ -635,6 +699,30 @@ define void @atomic_store_f32_release(ptr %a, float %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fmv.x.w a1, fa0
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_store_f32_release:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    fmv.x.w a1, fa0
+; RV32IA-ZALASR-WMO-NEXT:    sw.rl a1, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_store_f32_release:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    fmv.x.w a1, fa0
+; RV32IA-ZALASR-TSO-NEXT:    sw a1, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_f32_release:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    fmv.x.w a1, fa0
+; RV64IA-ZALASR-WMO-NEXT:    sw.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_f32_release:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    fmv.x.w a1, fa0
+; RV64IA-ZALASR-TSO-NEXT:    sw a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   store atomic float %b, ptr %a release, align 4
   ret void
 }
@@ -718,6 +806,18 @@ define void @atomic_store_f32_seq_cst(ptr %a, float %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_store_f32_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    fmv.x.w a1, fa0
+; RV32IA-ZALASR-NEXT:    sw.rl a1, (a0)
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_f32_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    fmv.x.w a1, fa0
+; RV64IA-ZALASR-NEXT:    sw.rl a1, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   store atomic float %b, ptr %a seq_cst, align 4
   ret void
 }
@@ -876,6 +976,18 @@ define void @atomic_store_f64_release(ptr %a, double %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fmv.x.d a1, fa0
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sd a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_f64_release:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    fmv.x.d a1, fa0
+; RV64IA-ZALASR-WMO-NEXT:    sd.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_f64_release:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    fmv.x.d a1, fa0
+; RV64IA-ZALASR-TSO-NEXT:    sd a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   store atomic double %b, ptr %a release, align 8
   ret void
 }
@@ -945,6 +1057,12 @@ define void @atomic_store_f64_seq_cst(ptr %a, double %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sd a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_f64_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    fmv.x.d a1, fa0
+; RV64IA-ZALASR-NEXT:    sd.rl a1, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   store atomic double %b, ptr %a seq_cst, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store.ll
index 1d5d918..5d3fed4 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store.ll
@@ -23,6 +23,15 @@
 ; RUN: llc -mtriple=riscv64 -global-isel -mattr=+a,+ztso -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
 
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s
+
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s
 
 define i8 @atomic_load_i8_unordered(ptr %a) nounwind {
 ; RV32I-LABEL: atomic_load_i8_unordered:
@@ -156,6 +165,26 @@ define i8 @atomic_load_i8_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    lb.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    lbu a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    lb.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i8_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    lbu a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i8, ptr %a acquire, align 1
   ret i8 %1
 }
@@ -232,6 +261,16 @@ define i8 @atomic_load_i8_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    lbu a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    lb.aq a0, (a0)
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i8_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    lb.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   %1 = load atomic i8, ptr %a seq_cst, align 1
   ret i8 %1
 }
@@ -368,6 +407,26 @@ define i16 @atomic_load_i16_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    lh.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    lh a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i16_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    lh.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i16_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    lh a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i16, ptr %a acquire, align 2
   ret i16 %1
 }
@@ -444,6 +503,16 @@ define i16 @atomic_load_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    lh a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    lh.aq a0, (a0)
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i16_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    lh.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   %1 = load atomic i16, ptr %a seq_cst, align 2
   ret i16 %1
 }
@@ -580,6 +649,26 @@ define i32 @atomic_load_i32_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    lw.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    lw a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i32_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    lw.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i32_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    lw a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i32, ptr %a acquire, align 4
   ret i32 %1
 }
@@ -656,6 +745,16 @@ define i32 @atomic_load_i32_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    lw a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    lw.aq a0, (a0)
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i32_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    lw.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   %1 = load atomic i32, ptr %a seq_cst, align 4
   ret i32 %1
 }
@@ -790,6 +889,16 @@ define i64 @atomic_load_i64_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_i64_acquire:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    ld.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_i64_acquire:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    ld a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   %1 = load atomic i64, ptr %a acquire, align 8
   ret i64 %1
 }
@@ -850,6 +959,11 @@ define i64 @atomic_load_i64_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ld a0, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_i64_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    ld.aq a0, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   %1 = load atomic i64, ptr %a seq_cst, align 8
   ret i64 %1
 }
@@ -986,6 +1100,26 @@ define void @atomic_store_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_store_i8_release:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    sb.rl a1, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_store_i8_release:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    sb a1, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_i8_release:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    sb.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_i8_release:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    sb a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   store atomic i8 %b, ptr %a release, align 1
   ret void
 }
@@ -1060,6 +1194,16 @@ define void @atomic_store_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sb a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_store_i8_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    sb.rl a1, (a0)
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_i8_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    sb.rl a1, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   store atomic i8 %b, ptr %a seq_cst, align 1
   ret void
 }
@@ -1196,6 +1340,26 @@ define void @atomic_store_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_store_i16_release:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    sh.rl a1, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_store_i16_release:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    sh a1, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_i16_release:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    sh.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_i16_release:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    sh a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   store atomic i16 %b, ptr %a release, align 2
   ret void
 }
@@ -1270,6 +1434,16 @@ define void @atomic_store_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sh a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_store_i16_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    sh.rl a1, (a0)
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_i16_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    sh.rl a1, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   store atomic i16 %b, ptr %a seq_cst, align 2
   ret void
 }
@@ -1406,6 +1580,26 @@ define void @atomic_store_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_store_i32_release:
+; RV32IA-ZALASR-WMO:       # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT:    sw.rl a1, (a0)
+; RV32IA-ZALASR-WMO-NEXT:    ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_store_i32_release:
+; RV32IA-ZALASR-TSO:       # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT:    sw a1, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_i32_release:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    sw.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_i32_release:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    sw a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   store atomic i32 %b, ptr %a release, align 4
   ret void
 }
@@ -1480,6 +1674,16 @@ define void @atomic_store_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sw a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV32IA-ZALASR-LABEL: atomic_store_i32_seq_cst:
+; RV32IA-ZALASR:       # %bb.0:
+; RV32IA-ZALASR-NEXT:    sw.rl a1, (a0)
+; RV32IA-ZALASR-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_i32_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    sw.rl a1, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   store atomic i32 %b, ptr %a seq_cst, align 4
   ret void
 }
@@ -1614,6 +1818,16 @@ define void @atomic_store_i64_release(ptr %a, i64 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE:       # %bb.0:
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sd a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_i64_release:
+; RV64IA-ZALASR-WMO:       # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT:    sd.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT:    ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_i64_release:
+; RV64IA-ZALASR-TSO:       # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT:    sd a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT:    ret
   store atomic i64 %b, ptr %a release, align 8
   ret void
 }
@@ -1673,6 +1887,11 @@ define void @atomic_store_i64_seq_cst(ptr %a, i64 %b) nounwind {
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    sd a1, 0(a0)
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    fence rw, rw
 ; RV64IA-TSO-TRAILING-FENCE-NEXT:    ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_i64_seq_cst:
+; RV64IA-ZALASR:       # %bb.0:
+; RV64IA-ZALASR-NEXT:    sd.rl a1, (a0)
+; RV64IA-ZALASR-NEXT:    ret
   store atomic i64 %b, ptr %a seq_cst, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
index 74249c1..e2d3bff 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
@@ -17,7 +17,7 @@ body:             |
     ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
     ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
-    ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+    ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8))
     ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
     ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
     %0:gpr(p0) = COPY $x10
@@ -42,7 +42,7 @@ body:             |
     ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
     ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
-    ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+    ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16))
     ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
     ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
     %0:gpr(p0) = COPY $x10
@@ -67,7 +67,7 @@ body:             |
     ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
     ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
-    ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+    ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
     ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
     ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
     %0:gpr(p0) = COPY $x10
@@ -92,7 +92,7 @@ body:             |
     ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
     ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
-    ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+    ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
     ; RV32IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_W]], 1
     ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
     ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
index a2f7e30..ab537ea 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
@@ -17,7 +17,7 @@ body:             |
     ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
     ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
-    ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+    ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8))
     ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
     ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
     %0:gpr(p0) = COPY $x10
@@ -42,7 +42,7 @@ body:             |
     ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
     ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
-    ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+    ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16))
     ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
     ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
     %0:gpr(p0) = COPY $x10
@@ -67,7 +67,7 @@ body:             |
     ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
     ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
-    ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+    ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
     ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
     ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
     %0:gpr(p0) = COPY $x10
@@ -92,7 +92,7 @@ body:             |
     ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
     ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
-    ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+    ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64))
     ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
     ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
     %0:gpr(p0) = COPY $x10
@@ -116,7 +116,7 @@ body:             |
     ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
     ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
-    ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+    ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64))
     ; RV64IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_D_RV64_]], 1
     ; RV64IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
     ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
index f7fdc33..e547972 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
@@ -15,7 +15,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
-    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8))
+    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -38,7 +38,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
-    ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16))
+    ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -61,7 +61,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
-    ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32))
+    ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -86,7 +86,7 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
     ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
-    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -113,7 +113,7 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
     ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
-    ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16))
+    ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -140,7 +140,7 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
     ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
-    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
index 178586c..f34826c 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
@@ -15,7 +15,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
-    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8))
+    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -38,7 +38,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
-    ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16))
+    ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -61,7 +61,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
-    ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32))
+    ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -84,7 +84,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
-    ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY]], [[COPY1]] :: (load store monotonic (s64))
+    ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY1]], [[COPY]] :: (load store monotonic (s64))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_D]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -109,7 +109,7 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
     ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
-    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -136,7 +136,7 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
     ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
-    ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16))
+    ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -163,7 +163,7 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
     ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
-    ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[SUB]] :: (load store monotonic (s32))
+    ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[SUB]], [[COPY]] :: (load store monotonic (s32))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
@@ -190,7 +190,7 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
     ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
-    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+    ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
     ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
     ; CHECK-NEXT: PseudoRET implicit $x10
     %0:gprb(p0) = COPY $x10
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/fallback-rv32.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/fallback-rv32.ll
new file mode 100644
index 0000000..85a5d9a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/fallback-rv32.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=2 \
+; RUN:     -pass-remarks-missed='gisel*' -mattr=+zve64d,+f,+d,+zvfh,+zvfbfmin \
+; RUN:     %s -o %t.out 2> %t.err
+; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-OUT < %t.out
+; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-ERR < %t.err
+
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: call
+; FALLBACK-WITH-REPORT-OUT-LABEL: test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl) {
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) poison, ptr %base, i32 %vl, i32 3)
+  ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0
+}
+
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to lower arguments
+; FALLBACK-WITH-REPORT-OUT-LABEL: test_vsseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t
+define void @test_vsseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %val, ptr %base, i32 %vl) {
+entry:
+  tail call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %val, ptr %base, i32 %vl, i32 3)
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/fallback-rv64.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/fallback-rv64.ll
new file mode 100644
index 0000000..b5405d3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/fallback-rv64.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=2 \
+; RUN:     -pass-remarks-missed='gisel*' -mattr=+zve64d,+f,+d,+zvfh,+zvfbfmin \
+; RUN:     %s -o %t.out 2> %t.err
+; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-OUT < %t.out
+; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-ERR < %t.err
+
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: call
+; FALLBACK-WITH-REPORT-OUT-LABEL: test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) {
+entry:
+  %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) poison, ptr %base, i64 %vl, i64 3)
+  ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0
+}
+
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to lower arguments
+; FALLBACK-WITH-REPORT-OUT-LABEL: test_vsseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t
+define void @test_vsseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %val, ptr %base, i64 %vl) {
+entry:
+  tail call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %val, ptr %base, i64 %vl, i64 3)
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index b0510f8..1213256 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -21,10 +21,19 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO,RV64IA-TSO-ZACAS %s
 
+; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO,RV32IA-WMO-ZABHA,RV32IA-WMO-ZABHA-NOZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zabha -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO,RV32IA-TSO-ZABHA,RV32IA-TSO-ZABHA-NOZACAS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-NOZACAS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-TSO,RV64IA-TSO-ZABHA,RV64IA-TSO-ZABHA-NOZACAS %s
+
+; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha,+zacas -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-WMO,RV32IA-WMO-ZABHA,RV32IA-WMO-ZABHA-ZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zabha,+zacas -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32IA,RV32IA-TSO,RV32IA-TSO-ZABHA,RV32IA-TSO-ZABHA-ZACAS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha,+zacas -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-ZACAS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha,+zacas -verify-machineinstrs < %s \
@@ -41,25 +50,25 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB0_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB0_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -91,6 +100,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT:    mv a5, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB0_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -111,6 +140,16 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
@@ -135,45 +174,45 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    mv a5, a1
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB1_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB1_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    mv a5, a1
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB1_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB1_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -225,6 +264,46 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB1_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB1_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -265,6 +344,16 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b.aq a0, a1, (a0)
@@ -289,45 +378,45 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a4, (a2)
-; RV32IA-WMO-NEXT:    mv a5, a1
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB2_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB2_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    mv a5, a1
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB2_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB2_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i8_release:
 ; RV64I:       # %bb.0:
@@ -379,6 +468,46 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB2_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB2_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -419,6 +548,16 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b.rl a0, a1, (a0)
@@ -443,45 +582,45 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    mv a5, a1
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB3_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB3_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    mv a5, a1
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB3_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB3_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -533,6 +672,46 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB3_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB3_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -573,6 +752,16 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, a1, (a0)
@@ -597,25 +786,25 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_i8_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB4_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB4_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -647,6 +836,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT:    mv a5, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB4_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -667,6 +876,16 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, a1, (a0)
@@ -695,16 +914,16 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_0_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a1, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a2, 255
-; RV32IA-NEXT:    sll a2, a2, a0
-; RV32IA-NEXT:    not a2, a2
-; RV32IA-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a2, 255
+; RV32IA-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT:    not a2, a2
+; RV32IA-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -728,6 +947,17 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a2, 255
+; RV32IA-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-ZACAS-NEXT:    not a2, a2
+; RV32IA-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a1, a0, -4
@@ -739,6 +969,16 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b a0, zero, (a0)
@@ -764,27 +1004,27 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a2, 255
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    not a2, a2
-; RV32IA-WMO-NEXT:    amoand.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a2, 255
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    not a2, a2
-; RV32IA-TSO-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -819,6 +1059,28 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -841,6 +1103,16 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.aq a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b.aq a0, zero, (a0)
@@ -866,27 +1138,27 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a2, 255
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    not a2, a2
-; RV32IA-WMO-NEXT:    amoand.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a2, 255
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    not a2, a2
-; RV32IA-TSO-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i8_release:
 ; RV64I:       # %bb.0:
@@ -921,6 +1193,28 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -943,6 +1237,16 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.rl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b.rl a0, zero, (a0)
@@ -968,27 +1272,27 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a2, 255
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    not a2, a2
-; RV32IA-WMO-NEXT:    amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a2, 255
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    not a2, a2
-; RV32IA-TSO-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -1023,6 +1327,28 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -1045,6 +1371,16 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, zero, (a0)
@@ -1070,27 +1406,27 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a2, 255
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    not a2, a2
-; RV32IA-WMO-NEXT:    amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a2, 255
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    not a2, a2
-; RV32IA-TSO-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -1125,6 +1461,28 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -1147,6 +1505,16 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, zero, (a0)
@@ -1172,15 +1540,15 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a1, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a2, 255
-; RV32IA-NEXT:    sll a2, a2, a0
-; RV32IA-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a2, 255
+; RV32IA-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -1203,6 +1571,16 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a2, 255
+; RV32IA-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a1, a0, -4
@@ -1213,6 +1591,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -1240,25 +1630,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a2, 255
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    amoor.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a2, 255
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -1291,6 +1681,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -1311,6 +1721,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -1338,25 +1760,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a2, 255
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    amoor.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a2, 255
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_release:
 ; RV64I:       # %bb.0:
@@ -1389,6 +1811,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -1409,6 +1851,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -1436,25 +1890,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a2, 255
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a2, 255
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -1487,6 +1941,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -1507,6 +1981,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -1534,25 +2020,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a2, 255
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a2, 255
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -1585,6 +2071,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a2, 255
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a2, 255
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -1605,6 +2111,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -1631,25 +2149,25 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    add a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB15_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT:    add a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB15_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -1681,6 +2199,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT:    add a5, a4, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB15_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_add_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -1701,6 +2239,16 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
@@ -1725,45 +2273,45 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_add_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    add a5, a4, a1
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB16_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    add a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB16_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_add_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    add a5, a4, a1
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB16_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    add a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB16_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -1815,6 +2363,46 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    add a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB16_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    add a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB16_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -1855,6 +2443,16 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.b.aq a0, a1, (a0)
@@ -1879,45 +2477,45 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_add_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a4, (a2)
-; RV32IA-WMO-NEXT:    add a5, a4, a1
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB17_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    add a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB17_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_add_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    add a5, a4, a1
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB17_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    add a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB17_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i8_release:
 ; RV64I:       # %bb.0:
@@ -1969,6 +2567,46 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    add a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB17_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    add a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB17_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -2009,6 +2647,16 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.b.rl a0, a1, (a0)
@@ -2033,45 +2681,45 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_add_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    add a5, a4, a1
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB18_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    add a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB18_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_add_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    add a5, a4, a1
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB18_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    add a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB18_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -2123,6 +2771,46 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    add a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB18_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    add a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB18_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -2163,6 +2851,16 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.b.aqrl a0, a1, (a0)
@@ -2187,25 +2885,25 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i8_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB19_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
-; RV32IA-NEXT:    add a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB19_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT:    add a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB19_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -2237,6 +2935,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT:    add a5, a4, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB19_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_add_i8_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -2257,6 +2975,16 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.b.aqrl a0, a1, (a0)
@@ -2281,25 +3009,25 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    sub a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB20_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB20_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -2331,6 +3059,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB20_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_sub_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -2351,6 +3099,18 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -2377,45 +3137,45 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    sub a5, a4, a1
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB21_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB21_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    sub a5, a4, a1
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB21_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB21_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -2467,6 +3227,46 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB21_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB21_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -2507,6 +3307,18 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -2533,45 +3345,45 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a4, (a2)
-; RV32IA-WMO-NEXT:    sub a5, a4, a1
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB22_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB22_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    sub a5, a4, a1
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB22_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB22_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i8_release:
 ; RV64I:       # %bb.0:
@@ -2623,6 +3435,46 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB22_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB22_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -2663,6 +3515,18 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -2689,45 +3553,45 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    sub a5, a4, a1
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB23_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB23_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    sub a5, a4, a1
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB23_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB23_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -2779,6 +3643,46 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB23_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB23_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -2819,6 +3723,18 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -2845,25 +3761,25 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i8_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
-; RV32IA-NEXT:    sub a5, a4, a1
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB24_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB24_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -2895,6 +3811,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT:    sub a5, a4, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB24_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -2915,6 +3851,18 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -2941,19 +3889,19 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_and_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    not a3, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    or a1, a1, a3
-; RV32IA-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -2979,6 +3927,20 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    not a3, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_and_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -2993,6 +3955,16 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.b a0, a1, (a0)
@@ -3017,33 +3989,33 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    not a3, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    or a1, a1, a3
-; RV32IA-WMO-NEXT:    amoand.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    not a3, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    or a1, a1, a3
-; RV32IA-TSO-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -3083,6 +4055,34 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -3111,6 +4111,16 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.b.aq a0, a1, (a0)
@@ -3135,33 +4145,33 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    not a3, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    or a1, a1, a3
-; RV32IA-WMO-NEXT:    amoand.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    not a3, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    or a1, a1, a3
-; RV32IA-TSO-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i8_release:
 ; RV64I:       # %bb.0:
@@ -3201,6 +4211,34 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -3229,6 +4267,16 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.b.rl a0, a1, (a0)
@@ -3253,33 +4301,33 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    not a3, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    or a1, a1, a3
-; RV32IA-WMO-NEXT:    amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    not a3, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    or a1, a1, a3
-; RV32IA-TSO-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -3319,6 +4367,34 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -3347,6 +4423,16 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.b.aqrl a0, a1, (a0)
@@ -3371,33 +4457,33 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    not a3, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    or a1, a1, a3
-; RV32IA-WMO-NEXT:    amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    not a3, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    or a1, a1, a3
-; RV32IA-TSO-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -3437,6 +4523,34 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -3465,6 +4579,16 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.b.aqrl a0, a1, (a0)
@@ -3489,26 +4613,26 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    and a5, a4, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB30_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB30_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -3541,6 +4665,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT:    and a5, a4, a1
+; RV32IA-ZACAS-NEXT:    not a5, a5
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB30_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -3562,6 +4707,48 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB30_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB30_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3604,6 +4791,36 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB30_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.b a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB30_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB30_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB30_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -3648,47 +4865,47 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    and a5, a4, a1
-; RV32IA-WMO-NEXT:    not a5, a5
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB31_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB31_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    and a5, a4, a1
-; RV32IA-TSO-NEXT:    not a5, a5
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB31_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB31_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -3742,6 +4959,48 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB31_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB31_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -3784,6 +5043,48 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB31_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB31_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -3826,6 +5127,36 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB31_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.b.aq a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB31_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB31_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB31_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -3870,47 +5201,47 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a4, (a2)
-; RV32IA-WMO-NEXT:    and a5, a4, a1
-; RV32IA-WMO-NEXT:    not a5, a5
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB32_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB32_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    and a5, a4, a1
-; RV32IA-TSO-NEXT:    not a5, a5
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB32_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB32_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i8_release:
 ; RV64I:       # %bb.0:
@@ -3964,6 +5295,48 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB32_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB32_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -4006,6 +5379,48 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB32_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB32_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_release:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4048,6 +5463,36 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB32_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.b.rl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB32_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB32_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB32_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_release:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -4092,47 +5537,47 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    and a5, a4, a1
-; RV32IA-WMO-NEXT:    not a5, a5
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB33_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB33_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    and a5, a4, a1
-; RV32IA-TSO-NEXT:    not a5, a5
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB33_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB33_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -4186,6 +5631,48 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB33_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB33_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -4228,6 +5715,48 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB33_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB33_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4270,6 +5799,36 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB33_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.b.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB33_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB33_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB33_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -4314,26 +5873,26 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i8_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
-; RV32IA-NEXT:    and a5, a4, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB34_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB34_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -4366,6 +5925,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT:    and a5, a4, a1
+; RV32IA-ZACAS-NEXT:    not a5, a5
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB34_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -4387,6 +5967,48 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB34_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB34_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -4429,6 +6051,38 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB34_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    fence rw, rw
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.b.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB34_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB34_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    fence rw, rw
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB34_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -4475,15 +6129,15 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_or_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -4505,6 +6159,16 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_or_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -4515,6 +6179,16 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.b a0, a1, (a0)
@@ -4539,25 +6213,25 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -4589,6 +6263,26 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -4609,6 +6303,16 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.b.aq a0, a1, (a0)
@@ -4633,25 +6337,25 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i8_release:
 ; RV64I:       # %bb.0:
@@ -4683,6 +6387,26 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -4703,6 +6427,16 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.b.rl a0, a1, (a0)
@@ -4727,25 +6461,25 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -4777,6 +6511,26 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -4797,6 +6551,16 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.b.aqrl a0, a1, (a0)
@@ -4821,25 +6585,25 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -4871,6 +6635,26 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -4891,6 +6675,16 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.b.aqrl a0, a1, (a0)
@@ -4915,15 +6709,15 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xor_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -4945,6 +6739,16 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xor_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -4955,6 +6759,16 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.b a0, a1, (a0)
@@ -4979,25 +6793,25 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoxor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -5029,6 +6843,26 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -5049,6 +6883,16 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.b.aq a0, a1, (a0)
@@ -5073,25 +6917,25 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoxor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i8_release:
 ; RV64I:       # %bb.0:
@@ -5123,6 +6967,26 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -5143,6 +7007,16 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.b.rl a0, a1, (a0)
@@ -5167,25 +7041,25 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -5217,6 +7091,26 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -5237,6 +7131,16 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.b.aqrl a0, a1, (a0)
@@ -5261,25 +7165,25 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -5311,6 +7215,26 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -5331,6 +7255,16 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.b.aqrl a0, a1, (a0)
@@ -5387,34 +7321,34 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    andi a4, a0, 24
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a4, a4, 24
-; RV32IA-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a3
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a4
-; RV32IA-NEXT:    sra a7, a7, a4
-; RV32IA-NEXT:    bge a7, a1, .LBB45_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB45_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a3
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB45_3: # in Loop: Header=BB45_1 Depth=1
-; RV32IA-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB45_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    bge a7, a1, .LBB45_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT:  .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a6, .LBB45_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -5487,6 +7421,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-ZACAS-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a5
+; RV32IA-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-ZACAS-NEXT:    bge a7, a1, .LBB45_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-ZACAS-NEXT:  .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a6, .LBB45_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_max_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -5516,6 +7479,16 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.b a0, a1, (a0)
@@ -5572,63 +7545,63 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_max_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    slli a1, a1, 24
-; RV32IA-WMO-NEXT:    andi a4, a0, 24
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    srai a1, a1, 24
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    xori a4, a4, 24
-; RV32IA-WMO-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a7, a1, .LBB46_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB46_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a7, a1, .LBB46_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB46_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_max_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    slli a1, a1, 24
-; RV32IA-TSO-NEXT:    andi a4, a0, 24
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    srai a1, a1, 24
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    xori a4, a4, 24
-; RV32IA-TSO-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a7, a1, .LBB46_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB46_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a7, a1, .LBB46_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB46_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -5730,6 +7703,64 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a7, a1, .LBB46_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB46_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a7, a1, .LBB46_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB46_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -5788,6 +7819,16 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.b.aq a0, a1, (a0)
@@ -5844,63 +7885,63 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_max_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    slli a1, a1, 24
-; RV32IA-WMO-NEXT:    andi a4, a0, 24
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    srai a1, a1, 24
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    xori a4, a4, 24
-; RV32IA-WMO-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a7, a1, .LBB47_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB47_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a7, a1, .LBB47_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB47_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_max_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    slli a1, a1, 24
-; RV32IA-TSO-NEXT:    andi a4, a0, 24
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    srai a1, a1, 24
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    xori a4, a4, 24
-; RV32IA-TSO-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a7, a1, .LBB47_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB47_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a7, a1, .LBB47_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB47_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i8_release:
 ; RV64I:       # %bb.0:
@@ -6002,6 +8043,64 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a7, a1, .LBB47_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB47_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a7, a1, .LBB47_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB47_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -6060,6 +8159,16 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.b.rl a0, a1, (a0)
@@ -6116,63 +8225,63 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_max_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    slli a1, a1, 24
-; RV32IA-WMO-NEXT:    andi a4, a0, 24
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    srai a1, a1, 24
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    xori a4, a4, 24
-; RV32IA-WMO-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a7, a1, .LBB48_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB48_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a7, a1, .LBB48_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB48_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_max_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    slli a1, a1, 24
-; RV32IA-TSO-NEXT:    andi a4, a0, 24
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    srai a1, a1, 24
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    xori a4, a4, 24
-; RV32IA-TSO-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a7, a1, .LBB48_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB48_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a7, a1, .LBB48_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB48_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -6274,6 +8383,64 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a7, a1, .LBB48_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB48_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a7, a1, .LBB48_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB48_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -6332,6 +8499,16 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.b.aqrl a0, a1, (a0)
@@ -6388,34 +8565,34 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i8_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    andi a4, a0, 24
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a4, a4, 24
-; RV32IA-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a3
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a4
-; RV32IA-NEXT:    sra a7, a7, a4
-; RV32IA-NEXT:    bge a7, a1, .LBB49_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a3
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB49_3: # in Loop: Header=BB49_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB49_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    bge a7, a1, .LBB49_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT:  .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a6, .LBB49_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -6488,6 +8665,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-ZACAS-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a5
+; RV32IA-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-ZACAS-NEXT:    bge a7, a1, .LBB49_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-ZACAS-NEXT:  .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a6, .LBB49_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_max_i8_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -6517,6 +8723,16 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.b.aqrl a0, a1, (a0)
@@ -6573,34 +8789,34 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_min_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    andi a4, a0, 24
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a4, a4, 24
-; RV32IA-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a3
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a4
-; RV32IA-NEXT:    sra a7, a7, a4
-; RV32IA-NEXT:    bge a1, a7, .LBB50_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a3
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB50_3: # in Loop: Header=BB50_1 Depth=1
-; RV32IA-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB50_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    bge a1, a7, .LBB50_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT:  .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a6, .LBB50_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -6673,6 +8889,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-ZACAS-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a5
+; RV32IA-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-ZACAS-NEXT:    bge a1, a7, .LBB50_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-ZACAS-NEXT:  .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a6, .LBB50_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_min_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -6702,6 +8947,16 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.b a0, a1, (a0)
@@ -6758,63 +9013,63 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_min_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    slli a1, a1, 24
-; RV32IA-WMO-NEXT:    andi a4, a0, 24
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    srai a1, a1, 24
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    xori a4, a4, 24
-; RV32IA-WMO-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a1, a7, .LBB51_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB51_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a1, a7, .LBB51_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB51_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_min_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    slli a1, a1, 24
-; RV32IA-TSO-NEXT:    andi a4, a0, 24
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    srai a1, a1, 24
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    xori a4, a4, 24
-; RV32IA-TSO-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a1, a7, .LBB51_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB51_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a1, a7, .LBB51_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB51_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -6916,6 +9171,64 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a1, a7, .LBB51_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB51_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a1, a7, .LBB51_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB51_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -6974,6 +9287,16 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.b.aq a0, a1, (a0)
@@ -7030,63 +9353,63 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_min_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    slli a1, a1, 24
-; RV32IA-WMO-NEXT:    andi a4, a0, 24
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    srai a1, a1, 24
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    xori a4, a4, 24
-; RV32IA-WMO-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a1, a7, .LBB52_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB52_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a1, a7, .LBB52_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB52_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_min_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    slli a1, a1, 24
-; RV32IA-TSO-NEXT:    andi a4, a0, 24
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    srai a1, a1, 24
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    xori a4, a4, 24
-; RV32IA-TSO-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a1, a7, .LBB52_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB52_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a1, a7, .LBB52_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB52_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i8_release:
 ; RV64I:       # %bb.0:
@@ -7188,6 +9511,64 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a1, a7, .LBB52_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB52_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a1, a7, .LBB52_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB52_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -7246,6 +9627,16 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.b.rl a0, a1, (a0)
@@ -7302,63 +9693,63 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_min_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    slli a1, a1, 24
-; RV32IA-WMO-NEXT:    andi a4, a0, 24
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    srai a1, a1, 24
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    xori a4, a4, 24
-; RV32IA-WMO-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a1, a7, .LBB53_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB53_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a1, a7, .LBB53_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB53_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_min_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    slli a1, a1, 24
-; RV32IA-TSO-NEXT:    andi a4, a0, 24
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    srai a1, a1, 24
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    xori a4, a4, 24
-; RV32IA-TSO-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a1, a7, .LBB53_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB53_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a1, a7, .LBB53_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB53_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -7460,6 +9851,64 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a1, a7, .LBB53_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB53_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a1, a7, .LBB53_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB53_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -7518,6 +9967,16 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.b.aqrl a0, a1, (a0)
@@ -7574,34 +10033,34 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_min_i8_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    slli a1, a1, 24
-; RV32IA-NEXT:    andi a4, a0, 24
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    srai a1, a1, 24
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    xori a4, a4, 24
-; RV32IA-NEXT:  .LBB54_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a3
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a4
-; RV32IA-NEXT:    sra a7, a7, a4
-; RV32IA-NEXT:    bge a1, a7, .LBB54_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB54_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a3
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB54_3: # in Loop: Header=BB54_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB54_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT:  .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    bge a1, a7, .LBB54_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT:  .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a6, .LBB54_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -7674,6 +10133,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 24
+; RV32IA-ZACAS-NEXT:    andi a4, a0, 24
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    srai a1, a1, 24
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    xori a4, a4, 24
+; RV32IA-ZACAS-NEXT:  .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a5
+; RV32IA-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-ZACAS-NEXT:    bge a1, a7, .LBB54_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-ZACAS-NEXT:  .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a6, .LBB54_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_min_i8_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -7703,6 +10191,16 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.b.aqrl a0, a1, (a0)
@@ -7757,29 +10255,29 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB55_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a6, a1, .LBB55_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB55_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB55_3: # in Loop: Header=BB55_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB55_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-NOZACAS-NEXT:    bgeu a6, a1, .LBB55_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:  .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB55_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -7845,6 +10343,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    bgeu a6, a1, .LBB55_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:  .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB55_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -7869,6 +10391,16 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.b a0, a1, (a0)
@@ -7923,53 +10455,53 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    and a6, a4, a3
-; RV32IA-WMO-NEXT:    mv a5, a4
-; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB56_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a4, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB56_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a6, a1, .LBB56_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB56_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    and a6, a4, a3
-; RV32IA-TSO-NEXT:    mv a5, a4
-; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB56_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a4, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB56_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a6, a1, .LBB56_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB56_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -8059,6 +10591,54 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a6, a1, .LBB56_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB56_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a6, a1, .LBB56_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB56_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -8107,6 +10687,16 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.b.aq a0, a1, (a0)
@@ -8161,53 +10751,53 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a4, (a2)
-; RV32IA-WMO-NEXT:    and a6, a4, a3
-; RV32IA-WMO-NEXT:    mv a5, a4
-; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB57_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a4, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB57_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a6, a1, .LBB57_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB57_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    and a6, a4, a3
-; RV32IA-TSO-NEXT:    mv a5, a4
-; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB57_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a4, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB57_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a6, a1, .LBB57_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB57_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i8_release:
 ; RV64I:       # %bb.0:
@@ -8297,6 +10887,54 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a6, a1, .LBB57_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB57_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a6, a1, .LBB57_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB57_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -8345,6 +10983,16 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.b.rl a0, a1, (a0)
@@ -8399,53 +11047,53 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    and a6, a4, a3
-; RV32IA-WMO-NEXT:    mv a5, a4
-; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB58_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a4, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB58_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a6, a1, .LBB58_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB58_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    and a6, a4, a3
-; RV32IA-TSO-NEXT:    mv a5, a4
-; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB58_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a4, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB58_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a6, a1, .LBB58_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB58_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -8535,6 +11183,54 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a6, a1, .LBB58_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB58_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a6, a1, .LBB58_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB58_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -8583,6 +11279,16 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.b.aqrl a0, a1, (a0)
@@ -8637,29 +11343,29 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i8_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB59_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a6, a1, .LBB59_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB59_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB59_3: # in Loop: Header=BB59_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB59_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-NOZACAS-NEXT:    bgeu a6, a1, .LBB59_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:  .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB59_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -8725,6 +11431,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    bgeu a6, a1, .LBB59_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:  .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB59_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -8749,6 +11479,16 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.b.aqrl a0, a1, (a0)
@@ -8803,29 +11543,29 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i8_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a6, .LBB60_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB60_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB60_3: # in Loop: Header=BB60_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB60_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-NOZACAS-NEXT:    bgeu a1, a6, .LBB60_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:  .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB60_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV64I:       # %bb.0:
@@ -8891,6 +11631,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    bgeu a1, a6, .LBB60_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:  .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB60_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -8915,6 +11679,16 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.b a0, a1, (a0)
@@ -8969,53 +11743,53 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    and a6, a4, a3
-; RV32IA-WMO-NEXT:    mv a5, a4
-; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB61_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a4, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB61_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a1, a6, .LBB61_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB61_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    and a6, a4, a3
-; RV32IA-TSO-NEXT:    mv a5, a4
-; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB61_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a4, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB61_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a1, a6, .LBB61_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB61_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i8_acquire:
 ; RV64I:       # %bb.0:
@@ -9105,6 +11879,54 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a1, a6, .LBB61_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB61_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a1, a6, .LBB61_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB61_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -9153,6 +11975,16 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.b.aq a0, a1, (a0)
@@ -9207,53 +12039,53 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i8_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a4, (a2)
-; RV32IA-WMO-NEXT:    and a6, a4, a3
-; RV32IA-WMO-NEXT:    mv a5, a4
-; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB62_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a4, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB62_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a1, a6, .LBB62_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB62_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i8_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    and a6, a4, a3
-; RV32IA-TSO-NEXT:    mv a5, a4
-; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB62_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a4, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB62_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a1, a6, .LBB62_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB62_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i8_release:
 ; RV64I:       # %bb.0:
@@ -9343,6 +12175,54 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a1, a6, .LBB62_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB62_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a1, a6, .LBB62_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB62_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -9391,6 +12271,16 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.b.rl a0, a1, (a0)
@@ -9445,53 +12335,53 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    li a3, 255
-; RV32IA-WMO-NEXT:    zext.b a1, a1
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT:    and a6, a4, a3
-; RV32IA-WMO-NEXT:    mv a5, a4
-; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB63_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a4, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a3
-; RV32IA-WMO-NEXT:    xor a5, a4, a5
-; RV32IA-WMO-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB63_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a4, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a1, a6, .LBB63_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB63_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    li a3, 255
-; RV32IA-TSO-NEXT:    zext.b a1, a1
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a4, (a2)
-; RV32IA-TSO-NEXT:    and a6, a4, a3
-; RV32IA-TSO-NEXT:    mv a5, a4
-; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB63_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a4, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a3
-; RV32IA-TSO-NEXT:    xor a5, a4, a5
-; RV32IA-TSO-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB63_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a4, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a1, a6, .LBB63_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB63_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV64I:       # %bb.0:
@@ -9581,6 +12471,54 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    li a3, 255
+; RV32IA-WMO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a1, a6, .LBB63_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB63_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    li a3, 255
+; RV32IA-TSO-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a1, a6, .LBB63_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB63_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -9629,6 +12567,16 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.b.aqrl a0, a1, (a0)
@@ -9683,29 +12631,29 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i8_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    li a3, 255
-; RV32IA-NEXT:    zext.b a1, a1
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
-; RV32IA-NEXT:    and a6, a4, a3
-; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a6, .LBB64_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB64_1 Depth=1
-; RV32IA-NEXT:    xor a5, a4, a1
-; RV32IA-NEXT:    and a5, a5, a3
-; RV32IA-NEXT:    xor a5, a4, a5
-; RV32IA-NEXT:  .LBB64_3: # in Loop: Header=BB64_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB64_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a4, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    li a3, 255
+; RV32IA-NOZACAS-NEXT:    zext.b a1, a1
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT:    and a6, a4, a3
+; RV32IA-NOZACAS-NEXT:    mv a5, a4
+; RV32IA-NOZACAS-NEXT:    bgeu a1, a6, .LBB64_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a3
+; RV32IA-NOZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT:  .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB64_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i8_seq_cst:
 ; RV64I:       # %bb.0:
@@ -9771,6 +12719,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    li a3, 255
+; RV32IA-ZACAS-NEXT:    zext.b a1, a1
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT:    and a6, a4, a3
+; RV32IA-ZACAS-NEXT:    mv a5, a4
+; RV32IA-ZACAS-NEXT:    bgeu a1, a6, .LBB64_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a1
+; RV32IA-ZACAS-NEXT:    and a5, a5, a3
+; RV32IA-ZACAS-NEXT:    xor a5, a4, a5
+; RV32IA-ZACAS-NEXT:  .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB64_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a4, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -9795,6 +12767,16 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a4, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.b.aqrl a0, a1, (a0)
@@ -9819,26 +12801,26 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB65_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB65_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -9871,6 +12853,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT:    mv a5, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB65_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -9892,6 +12895,16 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
@@ -9916,47 +12929,47 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    mv a5, a1
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB66_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB66_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    mv a5, a1
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB66_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB66_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -10010,6 +13023,48 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB66_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB66_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -10052,6 +13107,16 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h.aq a0, a1, (a0)
@@ -10076,47 +13141,47 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a3, (a2)
-; RV32IA-WMO-NEXT:    mv a5, a1
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB67_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB67_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    mv a5, a1
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB67_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB67_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i16_release:
 ; RV64I:       # %bb.0:
@@ -10170,6 +13235,48 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB67_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB67_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -10212,6 +13319,16 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h.rl a0, a1, (a0)
@@ -10236,47 +13353,47 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    mv a5, a1
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB68_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB68_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    mv a5, a1
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB68_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB68_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -10330,6 +13447,48 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB68_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB68_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -10372,6 +13531,16 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, a1, (a0)
@@ -10396,26 +13565,26 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_i16_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a2)
-; RV32IA-NEXT:    mv a5, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB69_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT:    mv a5, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB69_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -10448,6 +13617,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT:    mv a5, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB69_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -10469,6 +13659,16 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, a1, (a0)
@@ -10497,17 +13697,17 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_0_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a1, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a2, 16
-; RV32IA-NEXT:    addi a2, a2, -1
-; RV32IA-NEXT:    sll a2, a2, a0
-; RV32IA-NEXT:    not a2, a2
-; RV32IA-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT:    not a2, a2
+; RV32IA-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -10532,6 +13732,18 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a2, 16
+; RV32IA-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-ZACAS-NEXT:    not a2, a2
+; RV32IA-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a1, a0, -4
@@ -10544,6 +13756,16 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h a0, zero, (a0)
@@ -10569,29 +13791,29 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a2, 16
-; RV32IA-WMO-NEXT:    addi a2, a2, -1
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    not a2, a2
-; RV32IA-WMO-NEXT:    amoand.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a2, 16
-; RV32IA-TSO-NEXT:    addi a2, a2, -1
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    not a2, a2
-; RV32IA-TSO-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -10628,6 +13850,30 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -10652,6 +13898,16 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.aq a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h.aq a0, zero, (a0)
@@ -10677,29 +13933,29 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a2, 16
-; RV32IA-WMO-NEXT:    addi a2, a2, -1
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    not a2, a2
-; RV32IA-WMO-NEXT:    amoand.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a2, 16
-; RV32IA-TSO-NEXT:    addi a2, a2, -1
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    not a2, a2
-; RV32IA-TSO-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i16_release:
 ; RV64I:       # %bb.0:
@@ -10736,6 +13992,30 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -10760,6 +14040,16 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.rl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h.rl a0, zero, (a0)
@@ -10785,29 +14075,29 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a2, 16
-; RV32IA-WMO-NEXT:    addi a2, a2, -1
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    not a2, a2
-; RV32IA-WMO-NEXT:    amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a2, 16
-; RV32IA-TSO-NEXT:    addi a2, a2, -1
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    not a2, a2
-; RV32IA-TSO-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -10844,6 +14134,30 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -10868,6 +14182,16 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, zero, (a0)
@@ -10893,29 +14217,29 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a2, 16
-; RV32IA-WMO-NEXT:    addi a2, a2, -1
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    not a2, a2
-; RV32IA-WMO-NEXT:    amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a2, 16
-; RV32IA-TSO-NEXT:    addi a2, a2, -1
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    not a2, a2
-; RV32IA-TSO-NEXT:    amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_0_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -10952,6 +14276,30 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    not a2, a2
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    not a2, a2
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -10976,6 +14324,16 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, zero, (a0)
@@ -11002,16 +14360,16 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a1, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a2, 16
-; RV32IA-NEXT:    addi a2, a2, -1
-; RV32IA-NEXT:    sll a2, a2, a0
-; RV32IA-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -11036,6 +14394,17 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a2, 16
+; RV32IA-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a1, a0, -4
@@ -11047,6 +14416,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -11075,27 +14456,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a2, 16
-; RV32IA-WMO-NEXT:    addi a2, a2, -1
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    amoor.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a2, 16
-; RV32IA-TSO-NEXT:    addi a2, a2, -1
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -11131,6 +14512,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -11153,6 +14556,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -11181,27 +14596,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a2, 16
-; RV32IA-WMO-NEXT:    addi a2, a2, -1
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    amoor.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a2, 16
-; RV32IA-TSO-NEXT:    addi a2, a2, -1
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_release:
 ; RV64I:       # %bb.0:
@@ -11237,6 +14652,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -11259,6 +14696,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -11287,27 +14736,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a2, 16
-; RV32IA-WMO-NEXT:    addi a2, a2, -1
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a2, 16
-; RV32IA-TSO-NEXT:    addi a2, a2, -1
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -11343,6 +14792,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -11365,6 +14836,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -11393,27 +14876,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a1, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a2, 16
-; RV32IA-WMO-NEXT:    addi a2, a2, -1
-; RV32IA-WMO-NEXT:    sll a2, a2, a0
-; RV32IA-WMO-NEXT:    amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a1, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a2, 16
-; RV32IA-TSO-NEXT:    addi a2, a2, -1
-; RV32IA-TSO-NEXT:    sll a2, a2, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -11449,6 +14932,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a1, a0, -4
@@ -11471,6 +14976,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    li a1, -1
+; RV32IA-WMO-ZABHA-NEXT:    amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    li a1, -1
+; RV32IA-TSO-ZABHA-NEXT:    amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    li a1, -1
@@ -11497,26 +15014,26 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB80_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    add a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB80_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT:    add a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB80_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -11549,6 +15066,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT:    add a5, a3, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB80_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_add_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -11570,6 +15108,16 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
@@ -11594,47 +15142,47 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_add_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    add a5, a3, a1
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB81_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    add a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB81_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_add_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    add a5, a3, a1
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB81_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    add a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB81_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -11688,6 +15236,48 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    add a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB81_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    add a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB81_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -11730,6 +15320,16 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.h.aq a0, a1, (a0)
@@ -11754,47 +15354,47 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_add_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a3, (a2)
-; RV32IA-WMO-NEXT:    add a5, a3, a1
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB82_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    add a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB82_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_add_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    add a5, a3, a1
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB82_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    add a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB82_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i16_release:
 ; RV64I:       # %bb.0:
@@ -11848,6 +15448,48 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    add a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB82_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    add a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB82_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -11890,6 +15532,16 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.h.rl a0, a1, (a0)
@@ -11914,47 +15566,47 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_add_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    add a5, a3, a1
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB83_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    add a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB83_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_add_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    add a5, a3, a1
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB83_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    add a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB83_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -12008,6 +15660,48 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    add a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB83_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    add a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB83_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -12050,6 +15744,16 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.h.aqrl a0, a1, (a0)
@@ -12074,26 +15778,26 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_add_i16_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB84_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a2)
-; RV32IA-NEXT:    add a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB84_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT:    add a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB84_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_add_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -12126,6 +15830,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT:    add a5, a3, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB84_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_add_i16_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -12147,6 +15872,16 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoadd.h.aqrl a0, a1, (a0)
@@ -12171,26 +15906,26 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB85_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    sub a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB85_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB85_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -12223,6 +15958,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB85_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_sub_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -12244,6 +16000,18 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -12270,47 +16038,47 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    sub a5, a3, a1
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB86_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB86_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    sub a5, a3, a1
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB86_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB86_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -12364,6 +16132,48 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB86_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB86_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -12406,6 +16216,18 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -12432,47 +16254,47 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a3, (a2)
-; RV32IA-WMO-NEXT:    sub a5, a3, a1
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB87_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB87_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    sub a5, a3, a1
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB87_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB87_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i16_release:
 ; RV64I:       # %bb.0:
@@ -12526,6 +16348,48 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB87_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB87_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -12568,6 +16432,18 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -12594,47 +16470,47 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    sub a5, a3, a1
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB88_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB88_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    sub a5, a3, a1
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB88_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB88_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -12688,6 +16564,48 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB88_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB88_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -12730,6 +16648,18 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -12756,26 +16686,26 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_sub_i16_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB89_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a2)
-; RV32IA-NEXT:    sub a5, a3, a1
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB89_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB89_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_sub_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -12808,6 +16738,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT:    sub a5, a3, a1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB89_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -12829,6 +16780,18 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT:    amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT:    amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    neg a1, a1
@@ -12855,20 +16818,20 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_and_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    not a3, a4
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    or a1, a1, a3
-; RV32IA-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    not a3, a4
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -12895,6 +16858,21 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    not a3, a4
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_and_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -12910,6 +16888,16 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.h a0, a1, (a0)
@@ -12934,35 +16922,35 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    not a3, a4
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    or a1, a1, a3
-; RV32IA-WMO-NEXT:    amoand.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    not a3, a4
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    or a1, a1, a3
-; RV32IA-TSO-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -13004,6 +16992,36 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    not a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    not a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -13034,6 +17052,16 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.h.aq a0, a1, (a0)
@@ -13058,35 +17086,35 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    not a3, a4
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    or a1, a1, a3
-; RV32IA-WMO-NEXT:    amoand.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    not a3, a4
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    or a1, a1, a3
-; RV32IA-TSO-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i16_release:
 ; RV64I:       # %bb.0:
@@ -13128,6 +17156,36 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    not a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    not a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -13158,6 +17216,16 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.h.rl a0, a1, (a0)
@@ -13182,35 +17250,35 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    not a3, a4
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    or a1, a1, a3
-; RV32IA-WMO-NEXT:    amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    not a3, a4
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    or a1, a1, a3
-; RV32IA-TSO-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -13252,6 +17320,36 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    not a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    not a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -13282,6 +17380,16 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.h.aqrl a0, a1, (a0)
@@ -13306,35 +17414,35 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    not a3, a4
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    or a1, a1, a3
-; RV32IA-WMO-NEXT:    amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    not a3, a4
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    or a1, a1, a3
-; RV32IA-TSO-NEXT:    amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_and_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -13376,6 +17484,36 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    not a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    not a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -13406,6 +17544,16 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoand.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoand.h.aqrl a0, a1, (a0)
@@ -13430,27 +17578,27 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    and a5, a3, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB95_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB95_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -13484,6 +17632,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT:    and a5, a3, a1
+; RV32IA-ZACAS-NEXT:    not a5, a5
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB95_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -13506,6 +17676,50 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB95_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB95_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -13550,6 +17764,36 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB95_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.h a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB95_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB95_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB95_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -13594,49 +17838,49 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    and a5, a3, a1
-; RV32IA-WMO-NEXT:    not a5, a5
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB96_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB96_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    and a5, a3, a1
-; RV32IA-TSO-NEXT:    not a5, a5
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB96_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB96_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -13692,6 +17936,50 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB96_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB96_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -13736,6 +18024,50 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB96_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB96_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -13780,6 +18112,36 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB96_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.h.aq a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB96_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB96_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB96_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -13824,49 +18186,49 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a3, (a2)
-; RV32IA-WMO-NEXT:    and a5, a3, a1
-; RV32IA-WMO-NEXT:    not a5, a5
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB97_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB97_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    and a5, a3, a1
-; RV32IA-TSO-NEXT:    not a5, a5
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB97_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB97_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i16_release:
 ; RV64I:       # %bb.0:
@@ -13922,6 +18284,50 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB97_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB97_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -13966,6 +18372,50 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB97_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB97_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -14010,6 +18460,36 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB97_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.h.rl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB97_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB97_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB97_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -14054,49 +18534,49 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    and a5, a3, a1
-; RV32IA-WMO-NEXT:    not a5, a5
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB98_1
-; RV32IA-WMO-NEXT:  # %bb.2:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB98_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    and a5, a3, a1
-; RV32IA-TSO-NEXT:    not a5, a5
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB98_1
-; RV32IA-TSO-NEXT:  # %bb.2:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB98_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -14152,6 +18632,50 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB98_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB98_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -14196,6 +18720,50 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB98_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB98_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -14240,6 +18808,36 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB98_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.h.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB98_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB98_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB98_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -14284,27 +18882,27 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_nand_i16_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a2)
-; RV32IA-NEXT:    and a5, a3, a1
-; RV32IA-NEXT:    not a5, a5
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB99_1
-; RV32IA-NEXT:  # %bb.2:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB99_1
+; RV32IA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_nand_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -14338,6 +18936,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT:    and a5, a3, a1
+; RV32IA-ZACAS-NEXT:    not a5, a5
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB99_1
+; RV32IA-ZACAS-NEXT:  # %bb.2:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -14360,6 +18980,50 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB99_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a5, .LBB99_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:    andi a2, a0, -4
@@ -14404,6 +19068,38 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB99_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    fence rw, rw
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.h.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB99_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB99_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    fence rw, rw
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a4, .LBB99_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -14450,16 +19146,16 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_or_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    srli a1, a1, 16
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -14482,6 +19178,17 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_or_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -14493,6 +19200,16 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.h a0, a1, (a0)
@@ -14517,27 +19234,27 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    srli a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    srli a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -14571,6 +19288,28 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -14593,6 +19332,16 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.h.aq a0, a1, (a0)
@@ -14617,27 +19366,27 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    srli a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    srli a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i16_release:
 ; RV64I:       # %bb.0:
@@ -14671,6 +19420,28 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -14693,6 +19464,16 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.h.rl a0, a1, (a0)
@@ -14717,27 +19498,27 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    srli a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    srli a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -14771,6 +19552,28 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -14793,6 +19596,16 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.h.aqrl a0, a1, (a0)
@@ -14817,27 +19630,27 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    srli a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    srli a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_or_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -14871,6 +19684,28 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -14893,6 +19728,16 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoor.h.aqrl a0, a1, (a0)
@@ -14917,16 +19762,16 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_xor_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    srli a1, a1, 16
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-NEXT:    srl a0, a1, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -14949,6 +19794,17 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_xor_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -14960,6 +19816,16 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.h a0, a1, (a0)
@@ -14984,27 +19850,27 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    srli a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoxor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    srli a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -15038,6 +19904,28 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -15060,6 +19948,16 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.h.aq a0, a1, (a0)
@@ -15084,27 +19982,27 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    srli a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoxor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    srli a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i16_release:
 ; RV64I:       # %bb.0:
@@ -15138,6 +20036,28 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -15160,6 +20080,16 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.h.rl a0, a1, (a0)
@@ -15184,27 +20114,27 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    srli a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    srli a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -15238,6 +20168,28 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -15260,6 +20212,16 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.h.aqrl a0, a1, (a0)
@@ -15284,27 +20246,27 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_seq_cst:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    srli a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT:    srl a0, a1, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_seq_cst:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    srli a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT:    srl a0, a1, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_xor_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -15338,6 +20300,28 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -15360,6 +20344,16 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a1, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amoxor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amoxor.h.aqrl a0, a1, (a0)
@@ -15416,36 +20410,36 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    li a4, 16
-; RV32IA-NEXT:    andi a5, a0, 24
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    sub a4, a4, a5
-; RV32IA-NEXT:  .LBB110_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a3
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a4
-; RV32IA-NEXT:    sra a7, a7, a4
-; RV32IA-NEXT:    bge a7, a1, .LBB110_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB110_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a3
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB110_3: # in Loop: Header=BB110_1 Depth=1
-; RV32IA-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB110_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    li a4, 16
+; RV32IA-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT:  .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    bge a7, a1, .LBB110_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT:  .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a6, .LBB110_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -15520,6 +20514,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-ZACAS-NEXT:    li a4, 16
+; RV32IA-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-ZACAS-NEXT:  .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a5
+; RV32IA-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-ZACAS-NEXT:    bge a7, a1, .LBB110_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-ZACAS-NEXT:  .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a6, .LBB110_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_max_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -15551,6 +20576,16 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.h a0, a1, (a0)
@@ -15607,67 +20642,67 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_max_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    li a4, 16
-; RV32IA-WMO-NEXT:    andi a5, a0, 24
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    srai a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    sub a4, a4, a5
-; RV32IA-WMO-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a7, a1, .LBB111_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB111_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a7, a1, .LBB111_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB111_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_max_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    li a4, 16
-; RV32IA-TSO-NEXT:    andi a5, a0, 24
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    srai a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    sub a4, a4, a5
-; RV32IA-TSO-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a7, a1, .LBB111_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB111_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a7, a1, .LBB111_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB111_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -15773,6 +20808,68 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a7, a1, .LBB111_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB111_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a7, a1, .LBB111_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB111_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -15835,6 +20932,16 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.h.aq a0, a1, (a0)
@@ -15891,67 +20998,67 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_max_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    li a4, 16
-; RV32IA-WMO-NEXT:    andi a5, a0, 24
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    srai a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    sub a4, a4, a5
-; RV32IA-WMO-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a7, a1, .LBB112_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB112_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a7, a1, .LBB112_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB112_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_max_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    li a4, 16
-; RV32IA-TSO-NEXT:    andi a5, a0, 24
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    srai a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    sub a4, a4, a5
-; RV32IA-TSO-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a7, a1, .LBB112_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB112_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a7, a1, .LBB112_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB112_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i16_release:
 ; RV64I:       # %bb.0:
@@ -16057,6 +21164,68 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a7, a1, .LBB112_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB112_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a7, a1, .LBB112_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB112_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -16119,6 +21288,16 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.h.rl a0, a1, (a0)
@@ -16175,67 +21354,67 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_max_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    li a4, 16
-; RV32IA-WMO-NEXT:    andi a5, a0, 24
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    srai a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    sub a4, a4, a5
-; RV32IA-WMO-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a7, a1, .LBB113_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB113_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a7, a1, .LBB113_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB113_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_max_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    li a4, 16
-; RV32IA-TSO-NEXT:    andi a5, a0, 24
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    srai a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    sub a4, a4, a5
-; RV32IA-TSO-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a7, a1, .LBB113_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB113_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a7, a1, .LBB113_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB113_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -16341,6 +21520,68 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a7, a1, .LBB113_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB113_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a7, a1, .LBB113_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB113_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -16403,6 +21644,16 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.h.aqrl a0, a1, (a0)
@@ -16459,36 +21710,36 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_max_i16_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    li a4, 16
-; RV32IA-NEXT:    andi a5, a0, 24
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    sub a4, a4, a5
-; RV32IA-NEXT:  .LBB114_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a3
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a4
-; RV32IA-NEXT:    sra a7, a7, a4
-; RV32IA-NEXT:    bge a7, a1, .LBB114_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB114_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a3
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB114_3: # in Loop: Header=BB114_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB114_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    li a4, 16
+; RV32IA-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT:  .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    bge a7, a1, .LBB114_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT:  .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a6, .LBB114_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -16563,6 +21814,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-ZACAS-NEXT:    li a4, 16
+; RV32IA-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-ZACAS-NEXT:  .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a5
+; RV32IA-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-ZACAS-NEXT:    bge a7, a1, .LBB114_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-ZACAS-NEXT:  .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a6, .LBB114_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_max_i16_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -16594,6 +21876,16 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomax.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomax.h.aqrl a0, a1, (a0)
@@ -16650,36 +21942,36 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_min_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    li a4, 16
-; RV32IA-NEXT:    andi a5, a0, 24
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    sub a4, a4, a5
-; RV32IA-NEXT:  .LBB115_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a3
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a4
-; RV32IA-NEXT:    sra a7, a7, a4
-; RV32IA-NEXT:    bge a1, a7, .LBB115_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB115_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a3
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB115_3: # in Loop: Header=BB115_1 Depth=1
-; RV32IA-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB115_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    li a4, 16
+; RV32IA-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT:  .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    bge a1, a7, .LBB115_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT:  .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a6, .LBB115_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -16754,6 +22046,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-ZACAS-NEXT:    li a4, 16
+; RV32IA-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-ZACAS-NEXT:  .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a5
+; RV32IA-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-ZACAS-NEXT:    bge a1, a7, .LBB115_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-ZACAS-NEXT:  .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a6, .LBB115_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_min_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -16785,6 +22108,16 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.h a0, a1, (a0)
@@ -16841,67 +22174,67 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_min_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    li a4, 16
-; RV32IA-WMO-NEXT:    andi a5, a0, 24
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    srai a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    sub a4, a4, a5
-; RV32IA-WMO-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a1, a7, .LBB116_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB116_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a1, a7, .LBB116_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB116_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_min_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    li a4, 16
-; RV32IA-TSO-NEXT:    andi a5, a0, 24
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    srai a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    sub a4, a4, a5
-; RV32IA-TSO-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a1, a7, .LBB116_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB116_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a1, a7, .LBB116_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB116_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -17007,6 +22340,68 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a1, a7, .LBB116_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB116_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a1, a7, .LBB116_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB116_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -17069,6 +22464,16 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.h.aq a0, a1, (a0)
@@ -17125,67 +22530,67 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_min_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    li a4, 16
-; RV32IA-WMO-NEXT:    andi a5, a0, 24
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    srai a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    sub a4, a4, a5
-; RV32IA-WMO-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a1, a7, .LBB117_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB117_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a1, a7, .LBB117_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB117_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_min_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    li a4, 16
-; RV32IA-TSO-NEXT:    andi a5, a0, 24
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    srai a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    sub a4, a4, a5
-; RV32IA-TSO-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a1, a7, .LBB117_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB117_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a1, a7, .LBB117_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB117_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i16_release:
 ; RV64I:       # %bb.0:
@@ -17291,6 +22696,68 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a1, a7, .LBB117_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB117_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a1, a7, .LBB117_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB117_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -17353,6 +22820,16 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.h.rl a0, a1, (a0)
@@ -17409,67 +22886,67 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_min_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    slli a1, a1, 16
-; RV32IA-WMO-NEXT:    li a4, 16
-; RV32IA-WMO-NEXT:    andi a5, a0, 24
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    srai a1, a1, 16
-; RV32IA-WMO-NEXT:    sll a3, a3, a0
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:    sub a4, a4, a5
-; RV32IA-WMO-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT:    and a7, a5, a3
-; RV32IA-WMO-NEXT:    mv a6, a5
-; RV32IA-WMO-NEXT:    sll a7, a7, a4
-; RV32IA-WMO-NEXT:    sra a7, a7, a4
-; RV32IA-WMO-NEXT:    bge a1, a7, .LBB118_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a6, a5, a1
-; RV32IA-WMO-NEXT:    and a6, a6, a3
-; RV32IA-WMO-NEXT:    xor a6, a5, a6
-; RV32IA-WMO-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT:    bnez a6, .LBB118_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a5, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT:    bge a1, a7, .LBB118_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a6, .LBB118_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_min_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    slli a1, a1, 16
-; RV32IA-TSO-NEXT:    li a4, 16
-; RV32IA-TSO-NEXT:    andi a5, a0, 24
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    srai a1, a1, 16
-; RV32IA-TSO-NEXT:    sll a3, a3, a0
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:    sub a4, a4, a5
-; RV32IA-TSO-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a5, (a2)
-; RV32IA-TSO-NEXT:    and a7, a5, a3
-; RV32IA-TSO-NEXT:    mv a6, a5
-; RV32IA-TSO-NEXT:    sll a7, a7, a4
-; RV32IA-TSO-NEXT:    sra a7, a7, a4
-; RV32IA-TSO-NEXT:    bge a1, a7, .LBB118_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a6, a5, a1
-; RV32IA-TSO-NEXT:    and a6, a6, a3
-; RV32IA-TSO-NEXT:    xor a6, a5, a6
-; RV32IA-TSO-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT:    bnez a6, .LBB118_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a5, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT:    bge a1, a7, .LBB118_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a6, .LBB118_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -17575,6 +23052,68 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    li a4, 16
+; RV32IA-WMO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT:    bge a1, a7, .LBB118_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a6, .LBB118_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    li a4, 16
+; RV32IA-TSO-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT:    bge a1, a7, .LBB118_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT:  .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a6, .LBB118_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -17637,6 +23176,16 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.h.aqrl a0, a1, (a0)
@@ -17693,36 +23242,36 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_min_i16_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    slli a1, a1, 16
-; RV32IA-NEXT:    li a4, 16
-; RV32IA-NEXT:    andi a5, a0, 24
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    srai a1, a1, 16
-; RV32IA-NEXT:    sll a3, a3, a0
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:    sub a4, a4, a5
-; RV32IA-NEXT:  .LBB119_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
-; RV32IA-NEXT:    and a7, a5, a3
-; RV32IA-NEXT:    mv a6, a5
-; RV32IA-NEXT:    sll a7, a7, a4
-; RV32IA-NEXT:    sra a7, a7, a4
-; RV32IA-NEXT:    bge a1, a7, .LBB119_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB119_1 Depth=1
-; RV32IA-NEXT:    xor a6, a5, a1
-; RV32IA-NEXT:    and a6, a6, a3
-; RV32IA-NEXT:    xor a6, a5, a6
-; RV32IA-NEXT:  .LBB119_3: # in Loop: Header=BB119_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT:    bnez a6, .LBB119_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a5, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    li a4, 16
+; RV32IA-NOZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT:  .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT:    and a7, a5, a3
+; RV32IA-NOZACAS-NEXT:    mv a6, a5
+; RV32IA-NOZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT:    bge a1, a7, .LBB119_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT:    and a6, a6, a3
+; RV32IA-NOZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT:  .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a6, .LBB119_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_min_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -17797,6 +23346,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    slli a1, a1, 16
+; RV32IA-ZACAS-NEXT:    li a4, 16
+; RV32IA-ZACAS-NEXT:    andi a5, a0, 24
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    srai a1, a1, 16
+; RV32IA-ZACAS-NEXT:    sll a3, a3, a0
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:    sub a4, a4, a5
+; RV32IA-ZACAS-NEXT:  .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT:    and a7, a5, a3
+; RV32IA-ZACAS-NEXT:    mv a6, a5
+; RV32IA-ZACAS-NEXT:    sll a7, a7, a4
+; RV32IA-ZACAS-NEXT:    sra a7, a7, a4
+; RV32IA-ZACAS-NEXT:    bge a1, a7, .LBB119_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a1
+; RV32IA-ZACAS-NEXT:    and a6, a6, a3
+; RV32IA-ZACAS-NEXT:    xor a6, a5, a6
+; RV32IA-ZACAS-NEXT:  .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a6, .LBB119_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a5, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_min_i16_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -17828,6 +23408,16 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a5, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomin.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomin.h.aqrl a0, a1, (a0)
@@ -17886,30 +23476,30 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB120_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a6, a1, .LBB120_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB120_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB120_3: # in Loop: Header=BB120_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB120_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-NOZACAS-NEXT:    bgeu a6, a1, .LBB120_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:  .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB120_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -17980,6 +23570,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-ZACAS-NEXT:    mv a5, a3
+; RV32IA-ZACAS-NEXT:    bgeu a6, a1, .LBB120_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:  .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB120_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -18005,6 +23620,16 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.h a0, a1, (a0)
@@ -18063,55 +23688,55 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    and a6, a3, a4
-; RV32IA-WMO-NEXT:    mv a5, a3
-; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB121_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a3, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB121_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a6, a1, .LBB121_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB121_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    and a6, a3, a4
-; RV32IA-TSO-NEXT:    mv a5, a3
-; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB121_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a3, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB121_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a6, a1, .LBB121_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB121_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -18207,6 +23832,56 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a6, a1, .LBB121_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB121_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a6, a1, .LBB121_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB121_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -18257,6 +23932,16 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.h.aq a0, a1, (a0)
@@ -18315,55 +24000,55 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a3, (a2)
-; RV32IA-WMO-NEXT:    and a6, a3, a4
-; RV32IA-WMO-NEXT:    mv a5, a3
-; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB122_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a3, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB122_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a6, a1, .LBB122_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB122_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    and a6, a3, a4
-; RV32IA-TSO-NEXT:    mv a5, a3
-; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB122_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a3, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB122_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a6, a1, .LBB122_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB122_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i16_release:
 ; RV64I:       # %bb.0:
@@ -18459,6 +24144,56 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a6, a1, .LBB122_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB122_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a6, a1, .LBB122_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB122_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -18509,6 +24244,16 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.h.rl a0, a1, (a0)
@@ -18567,55 +24312,55 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    and a6, a3, a4
-; RV32IA-WMO-NEXT:    mv a5, a3
-; RV32IA-WMO-NEXT:    bgeu a6, a1, .LBB123_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a3, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB123_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a6, a1, .LBB123_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB123_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    and a6, a3, a4
-; RV32IA-TSO-NEXT:    mv a5, a3
-; RV32IA-TSO-NEXT:    bgeu a6, a1, .LBB123_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a3, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB123_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a6, a1, .LBB123_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB123_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -18711,6 +24456,56 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a6, a1, .LBB123_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB123_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a6, a1, .LBB123_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB123_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -18761,6 +24556,16 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.h.aqrl a0, a1, (a0)
@@ -18819,30 +24624,30 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umax_i16_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB124_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a6, a1, .LBB124_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB124_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB124_3: # in Loop: Header=BB124_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB124_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-NOZACAS-NEXT:    bgeu a6, a1, .LBB124_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:  .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB124_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umax_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -18913,6 +24718,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-ZACAS-NEXT:    mv a5, a3
+; RV32IA-ZACAS-NEXT:    bgeu a6, a1, .LBB124_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:  .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB124_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -18938,6 +24768,16 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amomaxu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amomaxu.h.aqrl a0, a1, (a0)
@@ -18996,30 +24836,30 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i16_monotonic:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB125_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a6, .LBB125_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB125_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB125_3: # in Loop: Header=BB125_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB125_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-NOZACAS-NEXT:    bgeu a1, a6, .LBB125_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:  .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB125_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV64I:       # %bb.0:
@@ -19090,6 +24930,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-ZACAS-NEXT:    mv a5, a3
+; RV32IA-ZACAS-NEXT:    bgeu a1, a6, .LBB125_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:  .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB125_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -19115,6 +24980,16 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.h a0, a1, (a0)
@@ -19173,55 +25048,55 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acquire:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    and a6, a3, a4
-; RV32IA-WMO-NEXT:    mv a5, a3
-; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB126_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a3, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB126_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a1, a6, .LBB126_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB126_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acquire:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    and a6, a3, a4
-; RV32IA-TSO-NEXT:    mv a5, a3
-; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB126_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a3, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB126_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a1, a6, .LBB126_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB126_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i16_acquire:
 ; RV64I:       # %bb.0:
@@ -19317,6 +25192,56 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a1, a6, .LBB126_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB126_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a1, a6, .LBB126_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB126_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acquire:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -19367,6 +25292,16 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acquire:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.h.aq a0, a1, (a0)
@@ -19425,55 +25360,55 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i16_release:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w a3, (a2)
-; RV32IA-WMO-NEXT:    and a6, a3, a4
-; RV32IA-WMO-NEXT:    mv a5, a3
-; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB127_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a3, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB127_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a1, a6, .LBB127_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB127_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i16_release:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    and a6, a3, a4
-; RV32IA-TSO-NEXT:    mv a5, a3
-; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB127_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a3, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB127_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a1, a6, .LBB127_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB127_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i16_release:
 ; RV64I:       # %bb.0:
@@ -19569,6 +25504,56 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a1, a6, .LBB127_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB127_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a1, a6, .LBB127_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB127_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_release:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -19619,6 +25604,16 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_release:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.h.rl a0, a1, (a0)
@@ -19677,55 +25672,55 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acq_rel:
-; RV32IA-WMO:       # %bb.0:
-; RV32IA-WMO-NEXT:    andi a2, a0, -4
-; RV32IA-WMO-NEXT:    slli a0, a0, 3
-; RV32IA-WMO-NEXT:    lui a3, 16
-; RV32IA-WMO-NEXT:    addi a3, a3, -1
-; RV32IA-WMO-NEXT:    sll a4, a3, a0
-; RV32IA-WMO-NEXT:    and a1, a1, a3
-; RV32IA-WMO-NEXT:    sll a1, a1, a0
-; RV32IA-WMO-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT:    lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT:    and a6, a3, a4
-; RV32IA-WMO-NEXT:    mv a5, a3
-; RV32IA-WMO-NEXT:    bgeu a1, a6, .LBB128_3
-; RV32IA-WMO-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-WMO-NEXT:    xor a5, a3, a1
-; RV32IA-WMO-NEXT:    and a5, a5, a4
-; RV32IA-WMO-NEXT:    xor a5, a3, a5
-; RV32IA-WMO-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-WMO-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT:    bnez a5, .LBB128_1
-; RV32IA-WMO-NEXT:  # %bb.4:
-; RV32IA-WMO-NEXT:    srl a0, a3, a0
-; RV32IA-WMO-NEXT:    ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO-NOZACAS:       # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT:    bgeu a1, a6, .LBB128_3
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT:    bnez a5, .LBB128_1
+; RV32IA-WMO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT:    ret
 ;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acq_rel:
-; RV32IA-TSO:       # %bb.0:
-; RV32IA-TSO-NEXT:    andi a2, a0, -4
-; RV32IA-TSO-NEXT:    slli a0, a0, 3
-; RV32IA-TSO-NEXT:    lui a3, 16
-; RV32IA-TSO-NEXT:    addi a3, a3, -1
-; RV32IA-TSO-NEXT:    sll a4, a3, a0
-; RV32IA-TSO-NEXT:    and a1, a1, a3
-; RV32IA-TSO-NEXT:    sll a1, a1, a0
-; RV32IA-TSO-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT:    lr.w a3, (a2)
-; RV32IA-TSO-NEXT:    and a6, a3, a4
-; RV32IA-TSO-NEXT:    mv a5, a3
-; RV32IA-TSO-NEXT:    bgeu a1, a6, .LBB128_3
-; RV32IA-TSO-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-TSO-NEXT:    xor a5, a3, a1
-; RV32IA-TSO-NEXT:    and a5, a5, a4
-; RV32IA-TSO-NEXT:    xor a5, a3, a5
-; RV32IA-TSO-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-TSO-NEXT:    sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT:    bnez a5, .LBB128_1
-; RV32IA-TSO-NEXT:  # %bb.4:
-; RV32IA-TSO-NEXT:    srl a0, a3, a0
-; RV32IA-TSO-NEXT:    ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO-NOZACAS:       # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT:    bgeu a1, a6, .LBB128_3
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT:    bnez a5, .LBB128_1
+; RV32IA-TSO-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i16_acq_rel:
 ; RV64I:       # %bb.0:
@@ -19821,6 +25816,56 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT:    bgeu a1, a6, .LBB128_3
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT:    bnez a5, .LBB128_1
+; RV32IA-WMO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT:    lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT:  .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT:    mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT:    bgeu a1, a6, .LBB128_3
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT:  .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT:    sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT:    bnez a5, .LBB128_1
+; RV32IA-TSO-ZACAS-NEXT:  # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
 ; RV64IA-WMO-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZACAS-NEXT:    andi a2, a0, -4
@@ -19871,6 +25916,16 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.h.aqrl a0, a1, (a0)
@@ -19929,30 +25984,30 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
-; RV32IA-LABEL: atomicrmw_umin_i16_seq_cst:
-; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a2, a0, -4
-; RV32IA-NEXT:    slli a0, a0, 3
-; RV32IA-NEXT:    lui a3, 16
-; RV32IA-NEXT:    addi a3, a3, -1
-; RV32IA-NEXT:    sll a4, a3, a0
-; RV32IA-NEXT:    and a1, a1, a3
-; RV32IA-NEXT:    sll a1, a1, a0
-; RV32IA-NEXT:  .LBB129_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a2)
-; RV32IA-NEXT:    and a6, a3, a4
-; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a6, .LBB129_3
-; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB129_1 Depth=1
-; RV32IA-NEXT:    xor a5, a3, a1
-; RV32IA-NEXT:    and a5, a5, a4
-; RV32IA-NEXT:    xor a5, a3, a5
-; RV32IA-NEXT:  .LBB129_3: # in Loop: Header=BB129_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT:    bnez a5, .LBB129_1
-; RV32IA-NEXT:  # %bb.4:
-; RV32IA-NEXT:    srl a0, a3, a0
-; RV32IA-NEXT:    ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-NOZACAS:       # %bb.0:
+; RV32IA-NOZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT:    lui a3, 16
+; RV32IA-NOZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT:    and a1, a1, a3
+; RV32IA-NOZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT:  .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT:    and a6, a3, a4
+; RV32IA-NOZACAS-NEXT:    mv a5, a3
+; RV32IA-NOZACAS-NEXT:    bgeu a1, a6, .LBB129_3
+; RV32IA-NOZACAS-NEXT:  # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT:    and a5, a5, a4
+; RV32IA-NOZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT:  .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-NOZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT:    bnez a5, .LBB129_1
+; RV32IA-NOZACAS-NEXT:  # %bb.4:
+; RV32IA-NOZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_umin_i16_seq_cst:
 ; RV64I:       # %bb.0:
@@ -20023,6 +26078,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-NOZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-ZACAS:       # %bb.0:
+; RV32IA-ZACAS-NEXT:    andi a2, a0, -4
+; RV32IA-ZACAS-NEXT:    slli a0, a0, 3
+; RV32IA-ZACAS-NEXT:    lui a3, 16
+; RV32IA-ZACAS-NEXT:    addi a3, a3, -1
+; RV32IA-ZACAS-NEXT:    sll a4, a3, a0
+; RV32IA-ZACAS-NEXT:    and a1, a1, a3
+; RV32IA-ZACAS-NEXT:    sll a1, a1, a0
+; RV32IA-ZACAS-NEXT:  .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT:    and a6, a3, a4
+; RV32IA-ZACAS-NEXT:    mv a5, a3
+; RV32IA-ZACAS-NEXT:    bgeu a1, a6, .LBB129_3
+; RV32IA-ZACAS-NEXT:  # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a1
+; RV32IA-ZACAS-NEXT:    and a5, a5, a4
+; RV32IA-ZACAS-NEXT:    xor a5, a3, a5
+; RV32IA-ZACAS-NEXT:  .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-ZACAS-NEXT:    sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT:    bnez a5, .LBB129_1
+; RV32IA-ZACAS-NEXT:  # %bb.4:
+; RV32IA-ZACAS-NEXT:    srl a0, a3, a0
+; RV32IA-ZACAS-NEXT:    ret
+;
 ; RV64IA-ZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
 ; RV64IA-ZACAS:       # %bb.0:
 ; RV64IA-ZACAS-NEXT:    andi a2, a0, -4
@@ -20048,6 +26128,16 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:    srlw a0, a3, a0
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-WMO-ZABHA:       # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT:    amominu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-TSO-ZABHA:       # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT:    amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst:
 ; RV64IA-WMO-ZABHA:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NEXT:    amominu.h.aqrl a0, a1, (a0)
@@ -20992,6 +27082,30 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
 ; RV64IA-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB150_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB150_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB150_1: # =>This Inner Loop Header: Depth=1
@@ -21016,6 +27130,34 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB150_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.w a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB150_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB150_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB150_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -21172,6 +27314,30 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w.aq a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB151_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB151_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB151_1: # =>This Inner Loop Header: Depth=1
@@ -21196,6 +27362,34 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB151_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.w.aq a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB151_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB151_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB151_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acquire:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -21352,6 +27546,30 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB152_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB152_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_release:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB152_1: # =>This Inner Loop Header: Depth=1
@@ -21376,6 +27594,34 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB152_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.w.rl a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB152_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB152_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB152_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_release:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -21532,6 +27778,30 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w.aq a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB153_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB153_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB153_1: # =>This Inner Loop Header: Depth=1
@@ -21556,6 +27826,34 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB153_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.w.aqrl a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB153_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB153_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB153_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
@@ -21692,6 +27990,30 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
 ; RV64IA-TSO-ZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-WMO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    lr.w.aqrl a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB154_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-TSO-ZABHA-NOZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    lr.w.aqrl a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    sc.w.rl a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    bnez a3, .LBB154_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:  # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
 ; RV64IA-WMO-ZABHA-NOZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-NOZACAS-NEXT:  .LBB154_1: # =>This Inner Loop Header: Depth=1
@@ -21716,6 +28038,36 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    mv a0, a2
 ; RV64IA-TSO-ZABHA-NOZACAS-NEXT:    ret
 ;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-WMO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  .LBB154_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    fence rw, rw
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    amocas.w.aqrl a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB154_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT:    ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-TSO-ZABHA-ZACAS:       # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  .LBB154_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    fence rw, rw
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    bne a0, a3, .LBB154_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:  # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT:    ret
+;
 ; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
 ; RV64IA-WMO-ZABHA-ZACAS:       # %bb.0:
 ; RV64IA-WMO-ZABHA-ZACAS-NEXT:    mv a2, a0
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index ead255b..f3529b1 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -443,7 +443,7 @@
 ; RV32ZVFBFWMA: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvfbfwma1p0_zvl32b1p0"
 ; RV32ZVFOFP8MIN: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfofp8min0p2_zvl32b1p0"
 ; RV32ZACAS: .attribute 5, "rv32i2p1_zaamo1p0_zacas1p0"
-; RV32ZALASR: .attribute 5, "rv32i2p1_zalasr0p1"
+; RV32ZALASR: .attribute 5, "rv32i2p1_zalasr0p9"
 ; RV32ZAMA16B: .attribute 5, "rv32i2p1_zama16b1p0"
 ; RV32ZICFILP: .attribute 5, "rv32i2p1_zicfilp1p0_zicsr2p0"
 ; RV32ZABHA: .attribute 5, "rv32i2p1_zaamo1p0_zabha1p0"
@@ -590,8 +590,8 @@
 ; RV64ZVFBFWMA: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvfbfwma1p0_zvl32b1p0"
 ; RV64ZVFOFP8MIN: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfofp8min0p2_zvl32b1p0"
 ; RV64ZACAS: .attribute 5, "rv64i2p1_zaamo1p0_zacas1p0"
-; RV64ZALASR: .attribute 5, "rv64i2p1_zalasr0p1"
-; RV64ZALASRA: .attribute 5, "rv64i2p1_a2p1_zaamo1p0_zalasr0p1_zalrsc1p0"
+; RV64ZALASR: .attribute 5, "rv64i2p1_zalasr0p9"
+; RV64ZALASRA: .attribute 5, "rv64i2p1_a2p1_zaamo1p0_zalasr0p9_zalrsc1p0"
 ; RV64ZICFILP: .attribute 5, "rv64i2p1_zicfilp1p0_zicsr2p0"
 ; RV64ZABHA: .attribute 5, "rv64i2p1_zaamo1p0_zabha1p0"
 ; RV64ZVBC32E: .attribute 5, "rv64i2p1_zicsr2p0_zvbc32e0p7_zve32x1p0_zvl32b1p0"
diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll
index e4e3454..610c72b 100644
--- a/llvm/test/CodeGen/RISCV/float-imm.ll
+++ b/llvm/test/CodeGen/RISCV/float-imm.ll
@@ -4,11 +4,10 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
 ; RUN:   -target-abi=lp64f | FileCheck %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s \
-; RUN:   -target-abi=ilp32 | FileCheck --check-prefixes=CHECKZFINX,RV32ZFINX %s
+; RUN:   -target-abi=ilp32 | FileCheck --check-prefixes=CHECKZFINX %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zfinx -verify-machineinstrs < %s \
-; RUN:   -target-abi=lp64 | FileCheck --check-prefixes=CHECKZFINX,RV64ZFINX %s
+; RUN:   -target-abi=lp64 | FileCheck --check-prefixes=CHECKZFINX %s
 
-; TODO: constant pool shouldn't be necessary for RV64IF.
 define float @float_imm() nounwind {
 ; CHECK-LABEL: float_imm:
 ; CHECK:       # %bb.0:
@@ -69,6 +68,3 @@ define float @float_negative_zero(ptr %pf) nounwind {
 ; CHECKZFINX-NEXT:    ret
   ret float -0.0
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32ZFINX: {{.*}}
-; RV64ZFINX: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll
index 1dc0da8c..ec1a7a4 100644
--- a/llvm/test/CodeGen/RISCV/half-imm.ll
+++ b/llvm/test/CodeGen/RISCV/half-imm.ll
@@ -5,22 +5,21 @@
 ; RUN:   -target-abi lp64f < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zhinx -verify-machineinstrs \
 ; RUN:   -target-abi ilp32 < %s \
-; RUN:   | FileCheck -check-prefix=RV32IZHINX %s
+; RUN:   | FileCheck -check-prefixes=CHECKIZHINX %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zhinx -verify-machineinstrs \
 ; RUN:   -target-abi lp64 < %s \
-; RUN:   | FileCheck -check-prefix=RV64IZHINX %s
+; RUN:   | FileCheck -check-prefixes=CHECKIZHINX %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \
 ; RUN:   -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \
 ; RUN:   -target-abi lp64f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zhinxmin -verify-machineinstrs \
 ; RUN:   -target-abi ilp32 < %s \
-; RUN:   | FileCheck -check-prefixes=CHECKIZHINXMIN,RV32IZHINXMIN %s
+; RUN:   | FileCheck -check-prefixes=CHECKIZHINXMIN %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zhinxmin -verify-machineinstrs \
 ; RUN:   -target-abi lp64 < %s \
-; RUN:   | FileCheck -check-prefixes=CHECKIZHINXMIN,RV64IZHINXMIN %s
+; RUN:   | FileCheck -check-prefixes=CHECKIZHINXMIN %s
 
-; TODO: constant pool shouldn't be necessary for RV32IZfh and RV64IZfh
 define half @half_imm() nounwind {
 ; CHECK-LABEL: half_imm:
 ; CHECK:       # %bb.0:
@@ -29,19 +28,12 @@ define half @half_imm() nounwind {
 ; CHECK-NEXT:    fmv.h.x fa0, a0
 ; CHECK-NEXT:    ret
 ;
-; RV32IZHINX-LABEL: half_imm:
-; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a0, 4
-; RV32IZHINX-NEXT:    addi a0, a0, 512
-; RV32IZHINX-NEXT:    # kill: def $x10_h killed $x10_h killed $x10
-; RV32IZHINX-NEXT:    ret
-;
-; RV64IZHINX-LABEL: half_imm:
-; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a0, 4
-; RV64IZHINX-NEXT:    addi a0, a0, 512
-; RV64IZHINX-NEXT:    # kill: def $x10_h killed $x10_h killed $x10
-; RV64IZHINX-NEXT:    ret
+; CHECKIZHINX-LABEL: half_imm:
+; CHECKIZHINX:       # %bb.0:
+; CHECKIZHINX-NEXT:    lui a0, 4
+; CHECKIZHINX-NEXT:    addi a0, a0, 512
+; CHECKIZHINX-NEXT:    # kill: def $x10_h killed $x10_h killed $x10
+; CHECKIZHINX-NEXT:    ret
 ;
 ; CHECKIZFHMIN-LABEL: half_imm:
 ; CHECKIZFHMIN:       # %bb.0:
@@ -68,19 +60,12 @@ define half @half_imm_op(half %a) nounwind {
 ; CHECK-NEXT:    fadd.h fa0, fa0, fa5
 ; CHECK-NEXT:    ret
 ;
-; RV32IZHINX-LABEL: half_imm_op:
-; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    li a1, 15
-; RV32IZHINX-NEXT:    slli a1, a1, 10
-; RV32IZHINX-NEXT:    fadd.h a0, a0, a1
-; RV32IZHINX-NEXT:    ret
-;
-; RV64IZHINX-LABEL: half_imm_op:
-; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    li a1, 15
-; RV64IZHINX-NEXT:    slli a1, a1, 10
-; RV64IZHINX-NEXT:    fadd.h a0, a0, a1
-; RV64IZHINX-NEXT:    ret
+; CHECKIZHINX-LABEL: half_imm_op:
+; CHECKIZHINX:       # %bb.0:
+; CHECKIZHINX-NEXT:    li a1, 15
+; CHECKIZHINX-NEXT:    slli a1, a1, 10
+; CHECKIZHINX-NEXT:    fadd.h a0, a0, a1
+; CHECKIZHINX-NEXT:    ret
 ;
 ; CHECKIZFHMIN-LABEL: half_imm_op:
 ; CHECKIZFHMIN:       # %bb.0:
@@ -108,15 +93,10 @@ define half @half_positive_zero(ptr %pf) nounwind {
 ; CHECK-NEXT:    fmv.h.x fa0, zero
 ; CHECK-NEXT:    ret
 ;
-; RV32IZHINX-LABEL: half_positive_zero:
-; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    li a0, 0
-; RV32IZHINX-NEXT:    ret
-;
-; RV64IZHINX-LABEL: half_positive_zero:
-; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    li a0, 0
-; RV64IZHINX-NEXT:    ret
+; CHECKIZHINX-LABEL: half_positive_zero:
+; CHECKIZHINX:       # %bb.0:
+; CHECKIZHINX-NEXT:    li a0, 0
+; CHECKIZHINX-NEXT:    ret
 ;
 ; CHECKIZFHMIN-LABEL: half_positive_zero:
 ; CHECKIZFHMIN:       # %bb.0:
@@ -137,15 +117,10 @@ define half @half_negative_zero(ptr %pf) nounwind {
 ; CHECK-NEXT:    fmv.h.x fa0, a0
 ; CHECK-NEXT:    ret
 ;
-; RV32IZHINX-LABEL: half_negative_zero:
-; RV32IZHINX:       # %bb.0:
-; RV32IZHINX-NEXT:    lui a0, 1048568
-; RV32IZHINX-NEXT:    ret
-;
-; RV64IZHINX-LABEL: half_negative_zero:
-; RV64IZHINX:       # %bb.0:
-; RV64IZHINX-NEXT:    lui a0, 1048568
-; RV64IZHINX-NEXT:    ret
+; CHECKIZHINX-LABEL: half_negative_zero:
+; CHECKIZHINX:       # %bb.0:
+; CHECKIZHINX-NEXT:    lui a0, 1048568
+; CHECKIZHINX-NEXT:    ret
 ;
 ; CHECKIZFHMIN-LABEL: half_negative_zero:
 ; CHECKIZFHMIN:       # %bb.0:
@@ -159,6 +134,3 @@ define half @half_negative_zero(ptr %pf) nounwind {
 ; CHECKIZHINXMIN-NEXT:    ret
   ret half -0.0
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32IZHINXMIN: {{.*}}
-; RV64IZHINXMIN: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index c028d25..7fd7626 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -409,15 +409,11 @@ define i64 @sh3adduw_2(i64 %0, i64 %1) {
 ;
 ; RV64ZBA-LABEL: sh3adduw_2:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    slli a0, a0, 3
-; RV64ZBA-NEXT:    srli a0, a0, 3
 ; RV64ZBA-NEXT:    sh3add.uw a0, a0, a1
 ; RV64ZBA-NEXT:    ret
 ;
 ; RV64XANDESPERF-LABEL: sh3adduw_2:
 ; RV64XANDESPERF:       # %bb.0:
-; RV64XANDESPERF-NEXT:    slli a0, a0, 3
-; RV64XANDESPERF-NEXT:    srli a0, a0, 3
 ; RV64XANDESPERF-NEXT:    nds.lea.d.ze a0, a1, a0
 ; RV64XANDESPERF-NEXT:    ret
   %3 = shl i64 %0, 3
@@ -436,15 +432,11 @@ define i64 @sh3adduw_3(i64 %0, i64 %1) {
 ;
 ; RV64ZBA-LABEL: sh3adduw_3:
 ; RV64ZBA:       # %bb.0:
-; RV64ZBA-NEXT:    slli a0, a0, 3
-; RV64ZBA-NEXT:    srli a0, a0, 3
 ; RV64ZBA-NEXT:    sh3add.uw a0, a0, a1
 ; RV64ZBA-NEXT:    ret
 ;
 ; RV64XANDESPERF-LABEL: sh3adduw_3:
 ; RV64XANDESPERF:       # %bb.0:
-; RV64XANDESPERF-NEXT:    slli a0, a0, 3
-; RV64XANDESPERF-NEXT:    srli a0, a0, 3
 ; RV64XANDESPERF-NEXT:    nds.lea.d.ze a0, a1, a0
 ; RV64XANDESPERF-NEXT:    ret
   %3 = shl i64 %0, 3
@@ -2681,7 +2673,7 @@ define i64 @srliw_3_sh3add(ptr %0, i32 signext %1) {
 ; RV64ZBA-LABEL: srliw_3_sh3add:
 ; RV64ZBA:       # %bb.0:
 ; RV64ZBA-NEXT:    srliw a1, a1, 3
-; RV64ZBA-NEXT:    sh3add.uw a0, a1, a0
+; RV64ZBA-NEXT:    sh3add a0, a1, a0
 ; RV64ZBA-NEXT:    ld a0, 0(a0)
 ; RV64ZBA-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
index 380a4a0..d1f1c46 100644
--- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
@@ -5,7 +5,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; CHECK-LABEL: atomicrmw_uinc_wrap_i8:
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  ! %bb.0:
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadStore | #StoreStore
 ; CHECK-NEXT:    and %o0, -4, %o2
 ; CHECK-NEXT:    mov 3, %o3
 ; CHECK-NEXT:    andn %o3, %o0, %o0
@@ -36,7 +36,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:  ! %bb.2: ! %atomicrmw.end
 ; CHECK-NEXT:    srl %o4, %o0, %o0
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadLoad | #LoadStore
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:    nop
   %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
@@ -47,7 +47,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; CHECK-LABEL: atomicrmw_uinc_wrap_i16:
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  ! %bb.0:
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadStore | #StoreStore
 ; CHECK-NEXT:    and %o0, -4, %o2
 ; CHECK-NEXT:    and %o0, 3, %o0
 ; CHECK-NEXT:    xor %o0, 2, %o0
@@ -79,7 +79,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:  ! %bb.2: ! %atomicrmw.end
 ; CHECK-NEXT:    srl %o5, %o0, %o0
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadLoad | #LoadStore
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:    nop
   %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
@@ -90,7 +90,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK-LABEL: atomicrmw_uinc_wrap_i32:
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  ! %bb.0:
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadStore | #StoreStore
 ; CHECK-NEXT:    ld [%o0], %o2
 ; CHECK-NEXT:  .LBB2_1: ! %atomicrmw.start
 ; CHECK-NEXT:    ! =>This Inner Loop Header: Depth=1
@@ -106,7 +106,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK-NEXT:    bne %icc, .LBB2_1
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:  ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadLoad | #LoadStore
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:    mov %o2, %o0
   %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst
@@ -160,7 +160,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; CHECK-LABEL: atomicrmw_udec_wrap_i8:
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  ! %bb.0:
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadStore | #StoreStore
 ; CHECK-NEXT:    and %o0, -4, %o2
 ; CHECK-NEXT:    mov 3, %o3
 ; CHECK-NEXT:    andn %o3, %o0, %o0
@@ -193,7 +193,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:  ! %bb.2: ! %atomicrmw.end
 ; CHECK-NEXT:    srl %o5, %o0, %o0
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadLoad | #LoadStore
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:    nop
   %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
@@ -204,7 +204,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; CHECK-LABEL: atomicrmw_udec_wrap_i16:
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  ! %bb.0:
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadStore | #StoreStore
 ; CHECK-NEXT:    and %o0, -4, %o2
 ; CHECK-NEXT:    and %o0, 3, %o0
 ; CHECK-NEXT:    xor %o0, 2, %o0
@@ -238,7 +238,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:  ! %bb.2: ! %atomicrmw.end
 ; CHECK-NEXT:    srl %g2, %o0, %o0
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadLoad | #LoadStore
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:    nop
   %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
@@ -249,7 +249,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK-LABEL: atomicrmw_udec_wrap_i32:
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  ! %bb.0:
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadStore | #StoreStore
 ; CHECK-NEXT:    ld [%o0], %o2
 ; CHECK-NEXT:  .LBB6_1: ! %atomicrmw.start
 ; CHECK-NEXT:    ! =>This Inner Loop Header: Depth=1
@@ -267,7 +267,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK-NEXT:    bne %icc, .LBB6_1
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:  ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT:    membar #LoadLoad | #LoadStore
 ; CHECK-NEXT:    retl
 ; CHECK-NEXT:    mov %o2, %o0
   %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst
diff --git a/llvm/test/CodeGen/SPARC/atomics-ordering.ll b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
new file mode 100644
index 0000000..7c13ac2
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
@@ -0,0 +1,446 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32
+; RUN: llc < %s -mtriple=sparc -mcpu=leon4 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-LEON4
+; RUN: llc < %s -mtriple=sparc -mcpu=v9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-V9
+; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC64
+
+define i32 @load_acq(ptr %0) nounwind {
+; SPARC32-LABEL: load_acq:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    call __atomic_load_4
+; SPARC32-NEXT:    mov 2, %o1
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: load_acq:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    ld [%o0], %o0
+;
+; SPARC32-V9-LABEL: load_acq:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    ld [%o0], %o0
+; SPARC32-V9-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    nop
+;
+; SPARC64-LABEL: load_acq:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    ld [%o0], %o0
+; SPARC64-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+  %2 = load atomic i32, ptr %0 acquire, align 4
+  ret i32 %2
+}
+
+define i32 @load_sc(ptr %0) nounwind {
+; SPARC32-LABEL: load_sc:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    call __atomic_load_4
+; SPARC32-NEXT:    mov 5, %o1
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: load_sc:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    ld [%o0], %o0
+;
+; SPARC32-V9-LABEL: load_sc:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    ld [%o0], %o0
+; SPARC32-V9-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    nop
+;
+; SPARC64-LABEL: load_sc:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    ld [%o0], %o0
+; SPARC64-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+  %2 = load atomic i32, ptr %0 seq_cst, align 4
+  ret i32 %2
+}
+
+define void @store_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: store_rel:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i1, %o1
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    call __atomic_store_4
+; SPARC32-NEXT:    mov 3, %o2
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore
+;
+; SPARC32-LEON4-LABEL: store_rel:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    stbar
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    st %o1, [%o0]
+;
+; SPARC32-V9-LABEL: store_rel:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    st %o1, [%o0]
+;
+; SPARC64-LABEL: store_rel:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    membar #LoadStore | #StoreStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    st %o1, [%o0]
+  store atomic i32 %1, ptr %0 release, align 4
+  ret void
+}
+
+define void @store_sc(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: store_sc:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i1, %o1
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    call __atomic_store_4
+; SPARC32-NEXT:    mov 5, %o2
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore
+;
+; SPARC32-LEON4-LABEL: store_sc:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    stbar
+; SPARC32-LEON4-NEXT:    st %o1, [%o0]
+; SPARC32-LEON4-NEXT:    stbar
+; SPARC32-LEON4-NEXT:    ldstub [%sp+-1], %g0
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    nop
+;
+; SPARC32-V9-LABEL: store_sc:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT:    st %o1, [%o0]
+; SPARC32-V9-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    nop
+;
+; SPARC64-LABEL: store_sc:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    membar #LoadStore | #StoreStore
+; SPARC64-NEXT:    st %o1, [%o0]
+; SPARC64-NEXT:    membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    nop
+  store atomic i32 %1, ptr %0 seq_cst, align 4
+  ret void
+}
+
+define i32 @rmw_acq(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_acq:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i1, %o1
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    call __atomic_exchange_4
+; SPARC32-NEXT:    mov 2, %o2
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_acq:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    swap [%o0], %o1
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_acq:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    swap [%o0], %o1
+; SPARC32-V9-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_acq:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    swap [%o0], %o1
+; SPARC64-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    mov %o1, %o0
+  %3 = atomicrmw xchg ptr %0, i32 %1 acquire, align 4
+  ret i32 %3
+}
+
+define i32 @rmw_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_rel:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i1, %o1
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    call __atomic_exchange_4
+; SPARC32-NEXT:    mov 3, %o2
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_rel:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    stbar
+; SPARC32-LEON4-NEXT:    swap [%o0], %o1
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_rel:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT:    swap [%o0], %o1
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_rel:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    membar #LoadStore | #StoreStore
+; SPARC64-NEXT:    swap [%o0], %o1
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    mov %o1, %o0
+  %3 = atomicrmw xchg ptr %0, i32 %1 release, align 4
+  ret i32 %3
+}
+
+define i32 @rmw_acq_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_acq_rel:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i1, %o1
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    call __atomic_exchange_4
+; SPARC32-NEXT:    mov 4, %o2
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_acq_rel:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    stbar
+; SPARC32-LEON4-NEXT:    swap [%o0], %o1
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_acq_rel:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT:    swap [%o0], %o1
+; SPARC32-V9-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_acq_rel:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    membar #LoadStore | #StoreStore
+; SPARC64-NEXT:    swap [%o0], %o1
+; SPARC64-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    mov %o1, %o0
+  %3 = atomicrmw xchg ptr %0, i32 %1 acq_rel, align 4
+  ret i32 %3
+}
+
+define i32 @rmw_sc(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_sc:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i1, %o1
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    call __atomic_exchange_4
+; SPARC32-NEXT:    mov 5, %o2
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_sc:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    stbar
+; SPARC32-LEON4-NEXT:    swap [%o0], %o1
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_sc:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT:    swap [%o0], %o1
+; SPARC32-V9-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_sc:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    membar #LoadStore | #StoreStore
+; SPARC64-NEXT:    swap [%o0], %o1
+; SPARC64-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    mov %o1, %o0
+  %3 = atomicrmw xchg ptr %0, i32 %1 seq_cst, align 4
+  ret i32 %3
+}
+
+define i32 @cas_acq(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_acq:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i2, %o2
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    st %i1, [%fp+-4]
+; SPARC32-NEXT:    add %fp, -4, %o1
+; SPARC32-NEXT:    mov 2, %o3
+; SPARC32-NEXT:    call __atomic_compare_exchange_4
+; SPARC32-NEXT:    mov %o3, %o4
+; SPARC32-NEXT:    ld [%fp+-4], %i0
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore
+;
+; SPARC32-LEON4-LABEL: cas_acq:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_acq:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    mov %o2, %o0
+;
+; SPARC64-LABEL: cas_acq:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    cas [%o0], %o1, %o2
+; SPARC64-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    mov %o2, %o0
+  %4 = cmpxchg ptr %0, i32 %1, i32 %2 acquire acquire, align 4
+  %5 = extractvalue { i32, i1 } %4, 0
+  ret i32 %5
+}
+
+define i32 @cas_rel(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_rel:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i2, %o2
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    st %i1, [%fp+-4]
+; SPARC32-NEXT:    add %fp, -4, %o1
+; SPARC32-NEXT:    mov 3, %o3
+; SPARC32-NEXT:    call __atomic_compare_exchange_4
+; SPARC32-NEXT:    mov %g0, %o4
+; SPARC32-NEXT:    ld [%fp+-4], %i0
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore
+;
+; SPARC32-LEON4-LABEL: cas_rel:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    stbar
+; SPARC32-LEON4-NEXT:    casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_rel:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT:    cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    mov %o2, %o0
+;
+; SPARC64-LABEL: cas_rel:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    membar #LoadStore | #StoreStore
+; SPARC64-NEXT:    cas [%o0], %o1, %o2
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    mov %o2, %o0
+  %4 = cmpxchg ptr %0, i32 %1, i32 %2 release monotonic, align 4
+  %5 = extractvalue { i32, i1 } %4, 0
+  ret i32 %5
+}
+
+define i32 @cas_acq_rel(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_acq_rel:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i2, %o2
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    st %i1, [%fp+-4]
+; SPARC32-NEXT:    add %fp, -4, %o1
+; SPARC32-NEXT:    mov 4, %o3
+; SPARC32-NEXT:    call __atomic_compare_exchange_4
+; SPARC32-NEXT:    mov 2, %o4
+; SPARC32-NEXT:    ld [%fp+-4], %i0
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore
+;
+; SPARC32-LEON4-LABEL: cas_acq_rel:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    stbar
+; SPARC32-LEON4-NEXT:    casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_acq_rel:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT:    cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    mov %o2, %o0
+;
+; SPARC64-LABEL: cas_acq_rel:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    membar #LoadStore | #StoreStore
+; SPARC64-NEXT:    cas [%o0], %o1, %o2
+; SPARC64-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    mov %o2, %o0
+  %4 = cmpxchg ptr %0, i32 %1, i32 %2 acq_rel acquire, align 4
+  %5 = extractvalue { i32, i1 } %4, 0
+  ret i32 %5
+}
+
+define i32 @cas_sc(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_sc:
+; SPARC32:       ! %bb.0:
+; SPARC32-NEXT:    save %sp, -96, %sp
+; SPARC32-NEXT:    mov %i2, %o2
+; SPARC32-NEXT:    mov %i0, %o0
+; SPARC32-NEXT:    st %i1, [%fp+-4]
+; SPARC32-NEXT:    add %fp, -4, %o1
+; SPARC32-NEXT:    mov 5, %o3
+; SPARC32-NEXT:    call __atomic_compare_exchange_4
+; SPARC32-NEXT:    mov %o3, %o4
+; SPARC32-NEXT:    ld [%fp+-4], %i0
+; SPARC32-NEXT:    ret
+; SPARC32-NEXT:    restore
+;
+; SPARC32-LEON4-LABEL: cas_sc:
+; SPARC32-LEON4:       ! %bb.0:
+; SPARC32-LEON4-NEXT:    stbar
+; SPARC32-LEON4-NEXT:    casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT:    retl
+; SPARC32-LEON4-NEXT:    mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_sc:
+; SPARC32-V9:       ! %bb.0:
+; SPARC32-V9-NEXT:    membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT:    cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT:    retl
+; SPARC32-V9-NEXT:    mov %o2, %o0
+;
+; SPARC64-LABEL: cas_sc:
+; SPARC64:       ! %bb.0:
+; SPARC64-NEXT:    membar #LoadStore | #StoreStore
+; SPARC64-NEXT:    cas [%o0], %o1, %o2
+; SPARC64-NEXT:    membar #LoadLoad | #LoadStore
+; SPARC64-NEXT:    retl
+; SPARC64-NEXT:    mov %o2, %o0
+  %4 = cmpxchg ptr %0, i32 %1, i32 %2 seq_cst seq_cst, align 4
+  %5 = extractvalue { i32, i1 } %4, 0
+  ret i32 %5
+}
diff --git a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
new file mode 100644
index 0000000..3fff2a8
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
@@ -0,0 +1,214 @@
+; UNSUPPORTED:expensive_checks
+; RUN:llc -O0 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN:   | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-O0 %s
+; RUN:llc -O1 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN:   | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+; RUN:llc -O2 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN:   | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+; RUN:llc -O3 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN:   | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+;
+; REQUIRES:asserts
+
+; SPIRV-O0:Target Library Information
+; SPIRV-O0-NEXT:Target Pass Configuration
+; SPIRV-O0-NEXT:Machine Module Information
+; SPIRV-O0-NEXT:Target Transform Information
+; SPIRV-O0-NEXT:Create Garbage Collector Module Metadata
+; SPIRV-O0-NEXT:Assumption Cache Tracker
+; SPIRV-O0-NEXT:Profile summary info
+; SPIRV-O0-NEXT:Machine Branch Probability Analysis
+; SPIRV-O0-NEXT:  ModulePass Manager
+; SPIRV-O0-NEXT:    Pre-ISel Intrinsic Lowering
+; SPIRV-O0-NEXT:    FunctionPass Manager
+; SPIRV-O0-NEXT:      Expand large div/rem
+; SPIRV-O0-NEXT:      Expand fp
+; SPIRV-O0-NEXT:      Lower Garbage Collection Instructions
+; SPIRV-O0-NEXT:      Shadow Stack GC Lowering
+; SPIRV-O0-NEXT:      Remove unreachable blocks from the CFG
+; SPIRV-O0-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
+; SPIRV-O0-NEXT:      Scalarize Masked Memory Intrinsics
+; SPIRV-O0-NEXT:      Expand reduction intrinsics
+; SPIRV-O0-NEXT:      SPIR-V Regularizer
+; SPIRV-O0-NEXT:    SPIRV prepare functions
+; SPIRV-O0-NEXT:    FunctionPass Manager
+; SPIRV-O0-NEXT:      Lower invoke and unwind, for unwindless code generators
+; SPIRV-O0-NEXT:      Remove unreachable blocks from the CFG
+; SPIRV-O0-NEXT:      SPIRV strip convergent intrinsics
+; SPIRV-O0-NEXT:    SPIRV Legalize Implicit Binding
+; SPIRV-O0-NEXT:    SPIRV CBuffer Access
+; SPIRV-O0-NEXT:    SPIRV emit intrinsics
+; SPIRV-O0-NEXT:    FunctionPass Manager
+; SPIRV-O0-NEXT:      SPIRV legalize bitcast pass
+; SPIRV-O0-NEXT:      Prepare callbr
+; SPIRV-O0-NEXT:      Safe Stack instrumentation pass
+; SPIRV-O0-NEXT:      Insert stack protectors
+; SPIRV-O0-NEXT:      Analysis containing CSE Info
+; SPIRV-O0-NEXT:      IRTranslator
+; SPIRV-O0-NEXT:      Analysis for ComputingKnownBits
+; SPIRV-O0-NEXT:      MachineDominator Tree Construction
+; SPIRV-O0-NEXT:      SPIRVPreLegalizerCombiner
+; SPIRV-O0-NEXT:      SPIRV pre legalizer
+; SPIRV-O0-NEXT:      Analysis containing CSE Info
+; SPIRV-O0-NEXT:      Legalizer
+; SPIRV-O0-NEXT:      SPIRV post legalizer
+; SPIRV-O0-NEXT:      Analysis for ComputingKnownBits
+; SPIRV-O0-NEXT:      Dominator Tree Construction
+; SPIRV-O0-NEXT:      Natural Loop Information
+; SPIRV-O0-NEXT:      Lazy Branch Probability Analysis
+; SPIRV-O0-NEXT:      Lazy Block Frequency Analysis
+; SPIRV-O0-NEXT:      InstructionSelect
+; SPIRV-O0-NEXT:      ResetMachineFunction
+; SPIRV-O0-NEXT:      Finalize ISel and expand pseudo-instructions
+; SPIRV-O0-NEXT:      Local Stack Slot Allocation
+; SPIRV-O0-NEXT:      Remove Redundant DEBUG_VALUE analysis
+; SPIRV-O0-NEXT:      Fixup Statepoint Caller Saved
+; SPIRV-O0-NEXT:      Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT:      Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT:      Prologue/Epilogue Insertion & Frame Finalization
+; SPIRV-O0-NEXT:      Post-RA pseudo instruction expansion pass
+; SPIRV-O0-NEXT:      Analyze Machine Code For Garbage Collection
+; SPIRV-O0-NEXT:      Insert fentry calls
+; SPIRV-O0-NEXT:      Insert XRay ops
+; SPIRV-O0-NEXT:      Machine Sanitizer Binary Metadata
+; SPIRV-O0-NEXT:      Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT:      Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT:      Stack Frame Layout Analysis
+; SPIRV-O0-NEXT:    SPIRV module analysis
+; SPIRV-O0-NEXT:    FunctionPass Manager
+; SPIRV-O0-NEXT:      Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT:      Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT:      SPIRV Assembly Printer
+; SPIRV-O0-NEXT:      Free MachineFunction
+
+; SPIRV-Opt:Target Library Information
+; SPIRV-Opt-NEXT:Target Pass Configuration
+; SPIRV-Opt-NEXT:Machine Module Information
+; SPIRV-Opt-NEXT:Target Transform Information
+; SPIRV-Opt-NEXT:Assumption Cache Tracker
+; SPIRV-Opt-NEXT:Type-Based Alias Analysis
+; SPIRV-Opt-NEXT:Scoped NoAlias Alias Analysis
+; SPIRV-Opt-NEXT:Profile summary info
+; SPIRV-Opt-NEXT:Create Garbage Collector Module Metadata
+; SPIRV-Opt-NEXT:Machine Branch Probability Analysis
+; SPIRV-Opt-NEXT:  ModulePass Manager
+; SPIRV-Opt-NEXT:    Pre-ISel Intrinsic Lowering
+; SPIRV-Opt-NEXT:    FunctionPass Manager
+; SPIRV-Opt-NEXT:      Expand large div/rem
+; SPIRV-Opt-NEXT:      Expand fp
+; SPIRV-Opt-NEXT:      Dominator Tree Construction
+; SPIRV-Opt-NEXT:      Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT:      Natural Loop Information
+; SPIRV-Opt-NEXT:      Canonicalize natural loops
+; SPIRV-Opt-NEXT:      Scalar Evolution Analysis
+; SPIRV-Opt-NEXT:      Loop Pass Manager
+; SPIRV-Opt-NEXT:        Canonicalize Freeze Instructions in Loops
+; SPIRV-Opt-NEXT:        Induction Variable Users
+; SPIRV-Opt-NEXT:        Loop Strength Reduction
+; SPIRV-Opt-NEXT:      Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT:      Function Alias Analysis Results
+; SPIRV-Opt-NEXT:      Merge contiguous icmps into a memcmp
+; SPIRV-Opt-NEXT:      Natural Loop Information
+; SPIRV-Opt-NEXT:      Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT:      Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT:      Expand memcmp() to load/stores
+; SPIRV-Opt-NEXT:      Lower Garbage Collection Instructions
+; SPIRV-Opt-NEXT:      Shadow Stack GC Lowering
+; SPIRV-Opt-NEXT:      Remove unreachable blocks from the CFG
+; SPIRV-Opt-NEXT:      Natural Loop Information
+; SPIRV-Opt-NEXT:      Post-Dominator Tree Construction
+; SPIRV-Opt-NEXT:      Branch Probability Analysis
+; SPIRV-Opt-NEXT:      Block Frequency Analysis
+; SPIRV-Opt-NEXT:      Constant Hoisting
+; SPIRV-Opt-NEXT:      Replace intrinsics with calls to vector library
+; SPIRV-Opt-NEXT:      Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT:      Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT:      Optimization Remark Emitter
+; SPIRV-Opt-NEXT:      Partially inline calls to library functions
+; SPIRV-Opt-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
+; SPIRV-Opt-NEXT:      Scalarize Masked Memory Intrinsics
+; SPIRV-Opt-NEXT:      Expand reduction intrinsics
+; SPIRV-Opt-NEXT:      SPIR-V Regularizer
+; SPIRV-Opt-NEXT:    SPIRV prepare functions
+; SPIRV-Opt-NEXT:    FunctionPass Manager
+; SPIRV-Opt-NEXT:      Dominator Tree Construction
+; SPIRV-Opt-NEXT:      Natural Loop Information
+; SPIRV-Opt-NEXT:      CodeGen Prepare
+; SPIRV-Opt-NEXT:      Lower invoke and unwind, for unwindless code generators
+; SPIRV-Opt-NEXT:      Remove unreachable blocks from the CFG
+; SPIRV-Opt-NEXT:      SPIRV strip convergent intrinsics
+; SPIRV-Opt-NEXT:    SPIRV Legalize Implicit Binding
+; SPIRV-Opt-NEXT:    SPIRV CBuffer Access
+; SPIRV-Opt-NEXT:    SPIRV emit intrinsics
+; SPIRV-Opt-NEXT:    FunctionPass Manager
+; SPIRV-Opt-NEXT:      SPIRV legalize bitcast pass
+; SPIRV-Opt-NEXT:      Dominator Tree Construction
+; SPIRV-Opt-NEXT:      Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT:      Function Alias Analysis Results
+; SPIRV-Opt-NEXT:      ObjC ARC contraction
+; SPIRV-Opt-NEXT:      Prepare callbr
+; SPIRV-Opt-NEXT:      Safe Stack instrumentation pass
+; SPIRV-Opt-NEXT:      Insert stack protectors
+; SPIRV-Opt-NEXT:      Analysis containing CSE Info
+; SPIRV-Opt-NEXT:      Natural Loop Information
+; SPIRV-Opt-NEXT:      Post-Dominator Tree Construction
+; SPIRV-Opt-NEXT:      Branch Probability Analysis
+; SPIRV-Opt-NEXT:      Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT:      Function Alias Analysis Results
+; SPIRV-Opt-NEXT:      IRTranslator
+; SPIRV-Opt-NEXT:      Analysis for ComputingKnownBits
+; SPIRV-Opt-NEXT:      MachineDominator Tree Construction
+; SPIRV-Opt-NEXT:      SPIRVPreLegalizerCombiner
+; SPIRV-Opt-NEXT:      SPIRV pre legalizer
+; SPIRV-Opt-NEXT:      Analysis containing CSE Info
+; SPIRV-Opt-NEXT:      Legalizer
+; SPIRV-Opt-NEXT:      SPIRV post legalizer
+; SPIRV-Opt-NEXT:      Analysis for ComputingKnownBits
+; SPIRV-Opt-NEXT:      Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT:      Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT:      InstructionSelect
+; SPIRV-Opt-NEXT:      ResetMachineFunction
+; SPIRV-Opt-NEXT:      Finalize ISel and expand pseudo-instructions
+; SPIRV-Opt-NEXT:      Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT:      Early Tail Duplication
+; SPIRV-Opt-NEXT:      Optimize machine instruction PHIs
+; SPIRV-Opt-NEXT:      Slot index numbering
+; SPIRV-Opt-NEXT:      Merge disjoint stack slots
+; SPIRV-Opt-NEXT:      Local Stack Slot Allocation
+; SPIRV-Opt-NEXT:      Remove dead machine instructions
+; SPIRV-Opt-NEXT:      MachineDominator Tree Construction
+; SPIRV-Opt-NEXT:      Machine Natural Loop Construction
+; SPIRV-Opt-NEXT:      Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT:      Early Machine Loop Invariant Code Motion
+; SPIRV-Opt-NEXT:      MachineDominator Tree Construction
+; SPIRV-Opt-NEXT:      Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT:      Machine Common Subexpression Elimination
+; SPIRV-Opt-NEXT:      MachinePostDominator Tree Construction
+; SPIRV-Opt-NEXT:      Machine Cycle Info Analysis
+; SPIRV-Opt-NEXT:      Machine code sinking
+; SPIRV-Opt-NEXT:      Peephole Optimizations
+; SPIRV-Opt-NEXT:      Remove dead machine instructions
+; SPIRV-Opt-NEXT:      Remove Redundant DEBUG_VALUE analysis
+; SPIRV-Opt-NEXT:      Fixup Statepoint Caller Saved
+; SPIRV-Opt-NEXT:      Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT:      Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT:      Prologue/Epilogue Insertion & Frame Finalization
+; SPIRV-Opt-NEXT:      Tail Duplication
+; SPIRV-Opt-NEXT:      Post-RA pseudo instruction expansion pass
+; SPIRV-Opt-NEXT:      Analyze Machine Code For Garbage Collection
+; SPIRV-Opt-NEXT:      Insert fentry calls
+; SPIRV-Opt-NEXT:      Insert XRay ops
+; SPIRV-Opt-NEXT:      Machine Sanitizer Binary Metadata
+; SPIRV-Opt-NEXT:      Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT:      Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT:      Stack Frame Layout Analysis
+; SPIRV-Opt-NEXT:    SPIRV module analysis
+; SPIRV-Opt-NEXT:    FunctionPass Manager
+; SPIRV-Opt-NEXT:      Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT:      Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT:      SPIRV Assembly Printer
+; SPIRV-Opt-NEXT:      Free MachineFunction
+
+define void @empty() {
+  ret void
+}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
index 021cb4c..8abe5c5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
@@ -8,7 +8,7 @@
 
 --- |
   %struct.DCT_InstanceTypeDef = type { ptr, i32, i32 }
-  
+
   ; Function Attrs: nofree nounwind
   define hidden arm_aapcs_vfpcc void @test(ptr nocapture readonly %S, ptr %pIn, ptr nocapture %pOut) {
   entry:
@@ -41,7 +41,7 @@
     %13 = call i32 @llvm.loop.decrement.reg.i32(i32 %8, i32 1)
     %14 = icmp ne i32 %13, 0
     br i1 %14, label %do.body, label %do.end
-  
+
   do.end:                                           ; preds = %do.body
     %15 = extractelement <4 x float> %11, i32 0
     %16 = extractelement <4 x float> %11, i32 1
@@ -56,7 +56,7 @@
     %sub4 = add i32 %1, -4
     %cmp5201 = icmp ugt i32 %sub4, 1
     br i1 %cmp5201, label %for.body.lr.ph, label %for.cond54.preheader
-  
+
   for.body.lr.ph:                                   ; preds = %do.end
     %scevgep = getelementptr float, ptr %pIn, i32 4
     %20 = add i32 %0, 4
@@ -161,7 +161,7 @@
     %63 = call i32 @llvm.loop.decrement.reg.i32(i32 %53, i32 1)
     %64 = icmp ne i32 %63, 0
     br i1 %64, label %do.body24, label %do.end33
-  
+
   do.end33:                                         ; preds = %do.body24
     %65 = bitcast ptr %lsr.iv27 to ptr
     %66 = bitcast ptr %lsr.iv20 to ptr
@@ -254,7 +254,7 @@
     %inc = add nuw i32 %k.1200, 1
     %exitcond.not = icmp eq i32 %inc, %1
     br i1 %exitcond.not, label %for.end72, label %for.body56
-  
+
   for.end72:                                        ; preds = %do.end66, %for.cond54.preheader
     ret void
   }
@@ -428,28 +428,28 @@ body:             |
     renamable $lr = t2LoopDec killed renamable $lr, 1
     t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
     tB %bb.2, 14 /* CC::al */, $noreg
-  
+
   bb.2.do.end:
     successors: %bb.3(0x40000000), %bb.7(0x40000000)
     liveins: $q0, $r2, $r3, $r4, $r5, $r11
-  
-    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
+
+    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     renamable $r0, dead $cpsr = tSUBi3 renamable $r3, 4, 14 /* CC::al */, $noreg
     tSTRspi killed renamable $r3, $sp, 1, 14 /* CC::al */, $noreg :: (store (s32) into %stack.8)
-    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg
+    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     tSTRspi renamable $r0, $sp, 8, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1)
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
     renamable $s2 = VLDRS renamable $r11, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.2)
     tCMPi8 killed renamable $r0, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr
     renamable $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VMULS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VMULS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     VSTRS killed renamable $s0, renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pOut)
     t2Bcc %bb.7, 3 /* CC::lo */, killed $cpsr
-  
+
   bb.3.for.body.lr.ph:
     successors: %bb.4(0x80000000)
     liveins: $r0, $r2, $r4, $r5, $r11
-  
+
     renamable $r6 = t2ADDri renamable $r5, 16, 14 /* CC::al */, $noreg, $noreg
     renamable $r1, dead $cpsr = tSUBi3 renamable $r4, 4, 14 /* CC::al */, $noreg
     tSTRspi killed renamable $r6, $sp, 4, 14 /* CC::al */, $noreg :: (store (s32) into %stack.5)
@@ -523,26 +523,26 @@ body:             |
     renamable $lr = t2LoopDec killed renamable $lr, 1
     t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr
     tB %bb.6, 14 /* CC::al */, $noreg
-  
+
   bb.6.do.end33:
     successors: %bb.4(0x7c000000), %bb.7(0x04000000)
     liveins: $q0, $q1, $q2, $q3, $r0, $r1, $r2, $r6, $r8, $r9, $r10, $r12
-  
-    renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s12, renamable $s13, 14 /* CC::al */, $noreg
-    renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s8, renamable $s9, 14 /* CC::al */, $noreg
-    renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, renamable $s14, 14 /* CC::al */, $noreg
-    renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, renamable $s10, 14 /* CC::al */, $noreg
-    renamable $s12 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, killed renamable $s15, 14 /* CC::al */, $noreg, implicit $q3
-    renamable $s8 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, killed renamable $s11, 14 /* CC::al */, $noreg, implicit $q2
-    renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s4, renamable $s5, 14 /* CC::al */, $noreg
-    renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
+
+    renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s12, renamable $s13, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s8, renamable $s9, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, renamable $s14, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, renamable $s10, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    renamable $s12 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, killed renamable $s15, 14 /* CC::al */, $noreg, implicit $q3, implicit $fpscr_rm
+    renamable $s8 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, killed renamable $s11, 14 /* CC::al */, $noreg, implicit $q2, implicit $fpscr_rm
+    renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s4, renamable $s5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     renamable $r7 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0)
-    renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, renamable $s6, 14 /* CC::al */, $noreg
-    renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, renamable $s2, 14 /* CC::al */, $noreg
+    renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, renamable $s6, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     renamable $r3 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg
     renamable $r7 = t2ADDrs renamable $r2, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg
-    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, killed renamable $s7, 14 /* CC::al */, $noreg, implicit $q1
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, killed renamable $s7, 14 /* CC::al */, $noreg, implicit $q1, implicit $fpscr_rm
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
     VSTRS killed renamable $s12, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx37)
     VSTRS killed renamable $s8, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx42)
     renamable $r3 = t2ADDrs renamable $r2, killed renamable $r8, 18, 14 /* CC::al */, $noreg, $noreg
@@ -597,7 +597,7 @@ body:             |
   bb.13:
     successors: %bb.10(0x80000000)
     liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12
-  
+
   bb.10.do.body59 (align 4):
     successors: %bb.10(0x7c000000), %bb.11(0x04000000)
     liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12
@@ -611,20 +611,20 @@ body:             |
     renamable $lr = t2LoopDec killed renamable $lr, 1
     t2LoopEnd renamable $lr, %bb.10, implicit-def dead $cpsr
     tB %bb.11, 14 /* CC::al */, $noreg
-  
+
   bb.11.do.end66:
     successors: %bb.12(0x04000000), %bb.9(0x7c000000)
     liveins: $q0, $r0, $r2, $r3, $r4, $r5, $r11, $r12
-  
-    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
+
+    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     renamable $r1 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg
-    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg
+    renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
     tCMPhir renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
     VSTRS killed renamable $s0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx70)
     tBcc %bb.9, 1 /* CC::ne */, killed $cpsr
-  
+
   bb.12.for.end72:
     $sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg
     $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d8, def $d9, def $d10, def $d11
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
index 31e88ea..85b826a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
@@ -185,15 +185,15 @@ body:             |
     successors: %bb.5(0x80000000)
     liveins: $q0, $r0, $r1, $r2, $r4
 
-    renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+    renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg, implicit $fpscr_rm
     $lr = tMOVr $r4, 14, $noreg
     $r3 = tMOVr $r1, 14, $noreg
-    renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
-    renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0
+    renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg, implicit $fpscr_rm
+    renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0, implicit $fpscr_rm
     $s2 = VMOVSR $r1, 14, $noreg
     renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
     $lr = t2DoLoopStart killed $r4
-    renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+    renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg, implicit $fpscr_rm
     renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, $noreg, undef renamable $q0
 
   bb.5:
@@ -215,13 +215,13 @@ body:             |
   bb.6:
     liveins: $q0, $r1, $r2
 
-    renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+    renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg, implicit $fpscr_rm
     renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14, $noreg
-    renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
-    renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0
+    renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg, implicit $fpscr_rm
+    renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0, implicit $fpscr_rm
     $s2 = VMOVSR killed $r0, 14, $noreg
     renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
-    renamable $s0 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+    renamable $s0 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg, implicit $fpscr_rm
     VSTRS killed renamable $s0, killed renamable $r2, 0, 14, $noreg
     tPOP_RET 14, $noreg, def $r4, def $pc
 
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
index f5da7ac..780831c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
@@ -232,9 +232,9 @@ body:             |
   bb.3.middle.block:
     liveins: $q1
 
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg
-    renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1, implicit $fpscr_rm
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr
     tBX_RET 14 /* CC::al */, $noreg, implicit killed $s0
 
@@ -376,9 +376,9 @@ body:             |
   bb.3.middle.block:
     liveins: $q1
 
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg
-    renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1, implicit $fpscr_rm
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr
     tBX_RET 14 /* CC::al */, $noreg, implicit killed $s0
 
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
index c331612..5dcd0a1 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
@@ -240,10 +240,10 @@ body:             |
 
     $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg
     $lr = tMOVr $r4, 14 /* CC::al */, $noreg
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
     $lr = t2DoLoopStart killed $r4
     renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg
     renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, $noreg, undef renamable $q0
     renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q1
@@ -267,10 +267,10 @@ body:             |
     liveins: $q0, $r1, $r2
 
     renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
     $s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg
     renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg
-    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg
+    renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pResult)
     frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
 
diff --git a/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir b/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir
index 5221205..d9d2f25 100644
--- a/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir
+++ b/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir
@@ -96,7 +96,7 @@ body:             |
   ; CHECK-NEXT: bb.6.for.body:
   ; CHECK-NEXT:   successors: %bb.7(0x80000000), %bb.8(0x00000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], %30, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], %30, 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[COPY7]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
   ; CHECK-NEXT:   [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
@@ -119,13 +119,13 @@ body:             |
   ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:spr = PHI [[VLDRS5]], %bb.6, %47, %bb.7
   ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:spr = PHI %40, %bb.6, %55, %bb.7
   ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, %45, %bb.7
-  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI4]], [[PHI5]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI4]], [[PHI5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK-NEXT:   [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
   ; CHECK-NEXT:   [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
   ; CHECK-NEXT:   [[t2ADDri4:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[t2ADDri5:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI3]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
@@ -140,7 +140,7 @@ body:             |
   ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:spr = PHI [[VLDRS5]], %bb.6, [[VLDRS6]], %bb.7
   ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:spr = PHI %40, %bb.6, %55, %bb.7
   ; CHECK-NEXT:   [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[VMULS1]], %bb.7
-  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI7]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI7]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.9:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
@@ -148,8 +148,8 @@ body:             |
   ; CHECK-NEXT:   [[PHI11:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
   ; CHECK-NEXT:   [[PHI12:%[0-9]+]]:spr = PHI [[VLDRS3]], %bb.5, [[PHI8]], %bb.8
   ; CHECK-NEXT:   [[PHI13:%[0-9]+]]:spr = PHI %30, %bb.5, [[PHI9]], %bb.8
-  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI12]], [[PHI13]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI11]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI12]], [[PHI13]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI11]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.for.end:
@@ -194,8 +194,8 @@ body:             |
     %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
     %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
     INLINEASM &nop, 0 /* attdialect */, 196618 /* regdef:SPR */, def %25, 2147483657 /* reguse tiedto:$0 */, %19(tied-def 3)
-    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %25, 14 /* CC::al */, $noreg
-    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %25, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
     %7:gpr = COPY %23
     %8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
index 5513bed..bfe55a5 100644
--- a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
+++ b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
@@ -147,10 +147,10 @@ body:             |
     $q5 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.zzz..sroa_cast241, align 32)
     $q1 = VMLAfq killed $q1, $q5, killed $q8, 14 /* CC::al */, $noreg
     $s8 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
-    $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0
-    $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
-    $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
-    $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0
+    $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0, implicit $fpscr_rm
+    $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0, implicit $fpscr_rm
+    $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0, implicit $fpscr_rm
+    $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0, implicit $fpscr_rm
     $r7 = t2SUBri $r0, 64, 14 /* CC::al */, $noreg, $noreg
     $q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.yyy..sroa_cast244, align 32)
     VSTMQIA $q8, %stack.1, 14 /* CC::al */, $noreg :: (store (s128) into %stack.1)
@@ -185,10 +185,10 @@ body:             |
     $r3 = VST1q32wb_fixed killed $r3, 16, killed $q10, 14 /* CC::al */, $noreg :: (store (s128) into %ir.zzz..sroa_cast241, align 32)
     $q10 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.zzz..sroa_cast241 + 16, basealign 32)
     $q1 = VMLAfq killed $q1, $q10, killed $q8, 14 /* CC::al */, $noreg
-    $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5
-    $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
-    $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
-    $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5
+    $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5, implicit $fpscr_rm
+    $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5, implicit $fpscr_rm
+    $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5, implicit $fpscr_rm
+    $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5, implicit $fpscr_rm
     VST1q64 killed $r5, 16, $q5, 14 /* CC::al */, $noreg :: (store (s128) into %ir.xxx..sroa_cast248 + 16, basealign 32)
     VST1q64 killed $r6, 16, $q5, 14 /* CC::al */, $noreg :: (store (s128) into %ir.vvv..sroa_cast230 + 16, basealign 32)
     $q8 = VLDMQIA %stack.0, 14 /* CC::al */, $noreg :: (load (s128) from %stack.0)
diff --git a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir b/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
index ba10045..20f044a 100644
--- a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
@@ -83,7 +83,7 @@ body:             |
   ; CHECK-NEXT:   [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
   ; CHECK-NEXT:   [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
@@ -98,7 +98,7 @@ body:             |
   ; CHECK-NEXT:   [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
   ; CHECK-NEXT:   [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
-  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
@@ -115,7 +115,7 @@ body:             |
   ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
   ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
   ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
-  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK-NEXT:   [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
   ; CHECK-NEXT:   [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
@@ -124,7 +124,7 @@ body:             |
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
-  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
   ; CHECK-NEXT:   t2B %bb.7, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
@@ -134,14 +134,14 @@ body:             |
   ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
   ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
   ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
-  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.9:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
   ; CHECK-NEXT:   [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
-  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.for.end:
@@ -185,8 +185,8 @@ body:             |
     %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
     %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
     %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
-    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
     %7:gpr = COPY %23
     %8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
index 854c5b8..177c94e 100644
--- a/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
@@ -84,7 +84,7 @@ body:             |
   ; CHECK-NEXT:   [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
   ; CHECK-NEXT:   [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gprlr = COPY [[t2DoLoopStart]]
   ; CHECK-NEXT:   [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[COPY5]], 1
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:gpr = COPY [[t2LoopDec]]
@@ -110,8 +110,8 @@ body:             |
   ; CHECK-NEXT:   [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
-  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI4]], [[PHI3]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI4]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:gpr = COPY [[t2LoopDec1]]
   ; CHECK-NEXT:   t2LoopEnd [[t2LoopDec1]], %bb.6, implicit-def $cpsr
   ; CHECK-NEXT:   t2B %bb.7, 14 /* CC::al */, $noreg
@@ -121,7 +121,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS]], %bb.6
   ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[VMULS1]], %bb.6
-  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI5]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.for.end:
@@ -166,8 +166,8 @@ body:             |
     %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
     %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
     %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
-    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     %42:gprlr = COPY %4
     %23:gprlr = t2LoopDec %42:gprlr, 1
     %7:gpr = COPY %23
diff --git a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
index dd02703..7939717 100644
--- a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
@@ -83,7 +83,7 @@ body:             |
   ; CHECK-NEXT:   [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
   ; CHECK-NEXT:   [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
@@ -98,7 +98,7 @@ body:             |
   ; CHECK-NEXT:   [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
   ; CHECK-NEXT:   [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
-  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
@@ -115,7 +115,7 @@ body:             |
   ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
   ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
   ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
-  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK-NEXT:   [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
   ; CHECK-NEXT:   [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
@@ -124,7 +124,7 @@ body:             |
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
   ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
-  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   t2Bcc %bb.7, 1 /* CC::ne */, $cpsr
   ; CHECK-NEXT:   t2B %bb.8, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
@@ -134,14 +134,14 @@ body:             |
   ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
   ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
   ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
-  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.9:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
   ; CHECK-NEXT:   [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
-  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.for.end:
@@ -185,8 +185,8 @@ body:             |
     %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
     %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
     %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
-    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
     %7:gpr = COPY %23
     %8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/Thumb2/swp-regpressure.mir b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
index 2bcb0c9..955b53df 100644
--- a/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
@@ -148,8 +148,8 @@ body:             |
   ; CHECK-NEXT:   [[VLDRS2:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
   ; CHECK-NEXT:   [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[VLDRS3:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS]], [[PHI3]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gpr = COPY [[t2SUBri2]]
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
@@ -236,8 +236,8 @@ body:             |
     %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
     %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
     %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
-    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
     %7:gpr = COPY %23
     %8:gpr = COPY %20
@@ -314,24 +314,24 @@ body:             |
   ; CHECK-NEXT:   [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %66:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %67:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %68:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %69:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %70:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %71:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %72:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %73:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %74:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %75:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %76:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %77:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %78:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %79:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %80:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %81:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %82:rgpr = COPY [[COPY4]]
-  ; CHECK-NEXT:   dead %83:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY7:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY8:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY9:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY10:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY11:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY12:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY13:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY14:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY15:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY16:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY17:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY18:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY19:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY20:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY21:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY22:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY23:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead [[COPY24:%[0-9]+]]:rgpr = COPY [[COPY4]]
   ; CHECK-NEXT:   t2Bcc %bb.9, 0 /* CC::eq */, $cpsr
   ; CHECK-NEXT:   t2B %bb.6, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
@@ -342,82 +342,82 @@ body:             |
   ; CHECK-NEXT:   [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
   ; CHECK-NEXT:   [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:gpr = COPY [[t2ADDri]]
+  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:gpr = COPY [[t2ADDri]]
   ; CHECK-NEXT:   [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %94:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %95:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %96:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %97:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %98:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %99:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %100:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %101:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %102:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %103:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %104:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %105:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %106:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %107:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %108:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %109:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %110:rgpr = COPY [[COPY6]]
-  ; CHECK-NEXT:   dead %111:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY27:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
+  ; CHECK-NEXT:   [[COPY28:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY29:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY30:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY31:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY32:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY33:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY34:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY35:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY36:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY37:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY38:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY39:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY40:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY41:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY42:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY43:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY44:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY45:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead [[COPY46:%[0-9]+]]:rgpr = COPY [[COPY6]]
   ; CHECK-NEXT:   t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
   ; CHECK-NEXT:   t2B %bb.7, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.7.for.body:
   ; CHECK-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, %116, %bb.7
-  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.6, %117, %bb.7
-  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY9]], %bb.6, %140, %bb.7
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY26]], %bb.6, %116, %bb.7
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY25]], %bb.6, %117, %bb.7
+  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY27]], %bb.6, %140, %bb.7
   ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %137, %bb.7
-  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:gprnopc = PHI [[COPY10]], %bb.6, %139, %bb.7
+  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:gprnopc = PHI [[COPY28]], %bb.6, %139, %bb.7
   ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, %118, %bb.7
   ; CHECK-NEXT:   [[VLDRS4:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
   ; CHECK-NEXT:   [[VLDRS5:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
   ; CHECK-NEXT:   [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK-NEXT:   [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
-  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   dead %119:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %120:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %121:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %122:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %123:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %124:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %125:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %126:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %127:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %128:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %129:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %130:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %131:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %132:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %133:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %134:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %135:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   dead %136:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[COPY47:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
+  ; CHECK-NEXT:   [[COPY48:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
+  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+  ; CHECK-NEXT:   dead [[COPY49:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY50:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY51:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY52:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY53:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY54:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY55:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY56:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY57:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY58:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY59:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY60:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY61:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY62:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY63:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY64:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY65:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead [[COPY66:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:rgpr = COPY [[PHI4]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
+  ; CHECK-NEXT:   [[COPY67:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   [[COPY68:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
   ; CHECK-NEXT:   t2Bcc %bb.7, 1 /* CC::ne */, $cpsr
   ; CHECK-NEXT:   t2B %bb.8, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.8:
   ; CHECK-NEXT:   successors: %bb.9(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, [[COPY11]], %bb.7
-  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.6, [[COPY12]], %bb.7
+  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:gprnopc = PHI [[COPY26]], %bb.6, [[COPY47]], %bb.7
+  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:gprnopc = PHI [[COPY25]], %bb.6, [[COPY48]], %bb.7
   ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
   ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[VMULS1]], %bb.7
-  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI9]], [[PHI8]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI9]], [[PHI8]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.9:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
@@ -427,8 +427,8 @@ body:             |
   ; CHECK-NEXT:   [[PHI12:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
   ; CHECK-NEXT:   [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI10]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
   ; CHECK-NEXT:   [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI11]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
-  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS7]], [[VLDRS6]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI12]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS7]], [[VLDRS6]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI12]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
   ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.for.end:
@@ -491,8 +491,8 @@ body:             |
     %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
     %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
     %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
-    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
     %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
     %7:gpr = COPY %23
     %8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 172ff53..e562c4a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -132,4 +132,17 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) {
   ret i32 %conv3
 }
 
+; Regression test for the intrinsic pattern matcher with nullary intrinsics
+define i64 @other_intrinsic() #0 {
+; CHECK-LABEL: other_intrinsic:
+; CHECK:         .functype other_intrinsic () -> (i64)
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    global.get $push0=, __tls_align
+; CHECK-NEXT:    return $pop0
+entry:
+  %0 = call i64 @llvm.wasm.tls.align.i64()
+  ret i64 %0
+}
+
+attributes #0 = { "target-features"="+atomics" }
 
diff --git a/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll-runtime.ll b/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll-runtime.ll
index d23afae..abcc566 100644
--- a/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll-runtime.ll
+++ b/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll-runtime.ll
@@ -5,7 +5,7 @@
 ;; Check atoms are remapped for runtime unrolling.
 
 ; CHECK: for.body.epil:
-; CHECK-NEXT: store i64 %indvars.iv.unr, ptr %p, align 4, !dbg [[G2R1:!.*]]
+; CHECK-NEXT: store i64 %indvars.iv.epil.init, ptr %p, align 4, !dbg [[G2R1:!.*]]
 
 ; CHECK: for.body.epil.1:
 ; CHECK-NEXT: store i64 %indvars.iv.next.epil, ptr %p, align 4, !dbg [[G3R1:!.*]]
diff --git a/llvm/test/DebugInfo/dwarf-complex-int.ll b/llvm/test/DebugInfo/dwarf-complex-int.ll
new file mode 100644
index 0000000..effd0ec
--- /dev/null
+++ b/llvm/test/DebugInfo/dwarf-complex-int.ll
@@ -0,0 +1,59 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s
+
+;; https://github.com/llvm/llvm-project/issues/140362
+;; Don't assert when emitting a complex integer type in DWARF.
+
+;; C source:
+;; int g;
+;;
+;; void foo(_Complex short c) { __builtin_memmove(&g, (char *)&c, 2); }
+;;
+;; void bar() { foo(0); }
+
+; CHECK: DW_AT_type ([[complex:0x[0-9a-f]+]] "complex")
+
+; CHECK: [[complex]]: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name        ("complex")
+; CHECK-NEXT: DW_AT_encoding    (0x80)
+; CHECK-NEXT: DW_AT_byte_size   (0x04)
+
+@g = dso_local local_unnamed_addr global i32 0, align 4, !dbg !0
+
+define dso_local void @bar() local_unnamed_addr !dbg !18 {
+entry:
+    #dbg_value(i32 0, !21, !DIExpression(), !27)
+  store i16 0, ptr @g, align 4, !dbg !29
+  ret void, !dbg !30
+}
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !8, line: 1, type: !9, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 22.0.0git", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !4, globals: !7, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "/app/example.cpp", directory: "/app")
+!4 = !{!5}
+!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64)
+!6 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!7 = !{!0}
+!8 = !DIFile(filename: "example.cpp", directory: "/app")
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !{i32 7, !"Dwarf Version", i32 5}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang version 22.0.0git"}
+!18 = distinct !DISubprogram(name: "bar", linkageName: "bar()", scope: !8, file: !8, line: 5, type: !19, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, keyInstructions: true)
+!19 = !DISubroutineType(types: !20)
+!20 = !{null}
+!21 = !DILocalVariable(name: "c", arg: 1, scope: !22, file: !8, line: 3, type: !25)
+!22 = distinct !DISubprogram(name: "foo", linkageName: "_ZL3fooCs", scope: !8, file: !8, line: 3, type: !23, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !26, keyInstructions: true)
+!23 = !DISubroutineType(types: !24)
+!24 = !{null, !25}
+!25 = !DIBasicType(name: "complex", size: 32, encoding: 128)
+!26 = !{!21}
+!27 = !DILocation(line: 0, scope: !22, inlinedAt: !28)
+!28 = distinct !DILocation(line: 5, column: 14, scope: !18)
+!29 = !DILocation(line: 3, column: 37, scope: !22, inlinedAt: !28, atomGroup: 1, atomRank: 1)
+!30 = !DILocation(line: 5, column: 22, scope: !18, atomGroup: 1, atomRank: 1)
diff --git a/llvm/test/Instrumentation/AddressSanitizer/RISCV/asan-rvv-intrinsics.ll b/llvm/test/Instrumentation/AddressSanitizer/RISCV/asan-rvv-intrinsics.ll
index 919f16b..4b50094 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/RISCV/asan-rvv-intrinsics.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/RISCV/asan-rvv-intrinsics.ll
@@ -180,7 +180,29 @@ define <vscale x 1 x i32> @test_vlseg2_nxv1i32(ptr %base, i64 %vl) sanitize_addr
 ; CHECK-LABEL: @test_vlseg2_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 8)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP25:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP24]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP25]]
 ;
@@ -194,7 +216,29 @@ define <vscale x 1 x i32> @test_vlseg2_mask_nxv1i32(ptr %base, i64 %vl, <vscale
 ; CHECK-LABEL: @test_vlseg2_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 8)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP25:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP24]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP25]]
 ;
@@ -212,7 +256,29 @@ define <vscale x 1 x i32> @test_vlseg3_nxv1i32(ptr %base, i64 %vl) sanitize_addr
 ; CHECK-LABEL: @test_vlseg3_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i96>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 12)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP37:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP36]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP37]]
 ;
@@ -226,7 +292,29 @@ define <vscale x 1 x i32> @test_vlseg3_mask_nxv1i32(ptr %base, i64 %vl, <vscale
 ; CHECK-LABEL: @test_vlseg3_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i96>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 12)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP37:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP36]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP37]]
 ;
@@ -244,7 +332,29 @@ define <vscale x 1 x i32> @test_vlseg4_nxv1i32(ptr %base, i64 %vl) sanitize_addr
 ; CHECK-LABEL: @test_vlseg4_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i128>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 16)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP49:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP48]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP49]]
 ;
@@ -258,7 +368,29 @@ define <vscale x 1 x i32> @test_vlseg4_mask_nxv1i32(ptr %base, i64 %vl, <vscale
 ; CHECK-LABEL: @test_vlseg4_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i128>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 16)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP49:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP48]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP49]]
 ;
@@ -276,7 +408,29 @@ define <vscale x 1 x i32> @test_vlseg5_nxv1i32(ptr %base, i64 %vl) sanitize_addr
 ; CHECK-LABEL: @test_vlseg5_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i160>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 20)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP61:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP60]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP61]]
 ;
@@ -290,7 +444,29 @@ define <vscale x 1 x i32> @test_vlseg5_mask_nxv1i32(ptr %base, i64 %vl, <vscale
 ; CHECK-LABEL: @test_vlseg5_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i160>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 20)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP61:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP60]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP61]]
 ;
@@ -308,7 +484,29 @@ define <vscale x 1 x i32> @test_vlseg6_nxv1i32(ptr %base, i64 %vl) sanitize_addr
 ; CHECK-LABEL: @test_vlseg6_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i192>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 24)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP73:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP72]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP73]]
 ;
@@ -322,7 +520,29 @@ define <vscale x 1 x i32> @test_vlseg6_mask_nxv1i32(ptr %base, i64 %vl, <vscale
 ; CHECK-LABEL: @test_vlseg6_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i192>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 24)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP73:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP72]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP73]]
 ;
@@ -340,7 +560,29 @@ define <vscale x 1 x i32> @test_vlseg7_nxv1i32(ptr %base, i64 %vl) sanitize_addr
 ; CHECK-LABEL: @test_vlseg7_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i224>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 28)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP85:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP84]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP85]]
 ;
@@ -354,7 +596,29 @@ define <vscale x 1 x i32> @test_vlseg7_mask_nxv1i32(ptr %base, i64 %vl, <vscale
 ; CHECK-LABEL: @test_vlseg7_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i224>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 28)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP85:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP84]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP85]]
 ;
@@ -372,7 +636,29 @@ define <vscale x 1 x i32> @test_vlseg8_nxv1i32(ptr %base, i64 %vl) sanitize_addr
 ; CHECK-LABEL: @test_vlseg8_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i256>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 32)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP97:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP96]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP97]]
 ;
@@ -386,7 +672,29 @@ define <vscale x 1 x i32> @test_vlseg8_mask_nxv1i32(ptr %base, i64 %vl, <vscale
 ; CHECK-LABEL: @test_vlseg8_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i256>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP8]], i64 32)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP97:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP96]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP97]]
 ;
@@ -404,7 +712,29 @@ define void @test_vsseg2_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>,
 ; CHECK-LABEL: @test_vsseg2_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 8)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -416,7 +746,29 @@ define void @test_vsseg2_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x
 ; CHECK-LABEL: @test_vsseg2_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i64>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 8)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -432,7 +784,29 @@ define void @test_vsseg3_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>,
 ; CHECK-LABEL: @test_vsseg3_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i96>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 12)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -444,7 +818,29 @@ define void @test_vsseg3_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x
 ; CHECK-LABEL: @test_vsseg3_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i96>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 12)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -460,7 +856,29 @@ define void @test_vsseg4_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>,
 ; CHECK-LABEL: @test_vsseg4_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i128>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 16)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -472,7 +890,29 @@ define void @test_vsseg4_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x
 ; CHECK-LABEL: @test_vsseg4_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i128>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 16)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -488,7 +928,29 @@ define void @test_vsseg5_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>,
 ; CHECK-LABEL: @test_vsseg5_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i160>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 20)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -500,7 +962,29 @@ define void @test_vsseg5_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x
 ; CHECK-LABEL: @test_vsseg5_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i160>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 20)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -516,7 +1000,29 @@ define void @test_vsseg6_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>,
 ; CHECK-LABEL: @test_vsseg6_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i192>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 24)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -528,7 +1034,29 @@ define void @test_vsseg6_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x
 ; CHECK-LABEL: @test_vsseg6_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i192>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 24)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -544,7 +1072,29 @@ define void @test_vsseg7_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>,
 ; CHECK-LABEL: @test_vsseg7_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i224>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 28)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -556,7 +1106,29 @@ define void @test_vsseg7_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x
 ; CHECK-LABEL: @test_vsseg7_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i224>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 28)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -572,7 +1144,29 @@ define void @test_vsseg8_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>,
 ; CHECK-LABEL: @test_vsseg8_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i256>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 32)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -584,7 +1178,29 @@ define void @test_vsseg8_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x
 ; CHECK-LABEL: @test_vsseg8_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP10:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP9:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP9]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <vscale x 1 x i256>, ptr [[BASE:%.*]], i64 0, i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP8]], i64 32)
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -792,7 +1408,30 @@ define <vscale x 1 x i32> @test_vlsseg2_nxv1i32(ptr %base, i64 %offset, i64 %vl)
 ; CHECK-LABEL: @test_vlsseg2_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlsseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 8)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlsseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP25:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP24]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP25]]
 ;
@@ -806,7 +1445,30 @@ define <vscale x 1 x i32> @test_vlsseg2_mask_nxv1i32(ptr %base, i64 %offset, i64
 ; CHECK-LABEL: @test_vlsseg2_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlsseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 8)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP24:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlsseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP25:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP24]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP25]]
 ;
@@ -824,7 +1486,30 @@ define <vscale x 1 x i32> @test_vlsseg3_nxv1i32(ptr %base, i64 %offset, i64 %vl)
 ; CHECK-LABEL: @test_vlsseg3_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlsseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 12)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlsseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP37:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP36]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP37]]
 ;
@@ -838,7 +1523,30 @@ define <vscale x 1 x i32> @test_vlsseg3_mask_nxv1i32(ptr %base, i64 %offset, i64
 ; CHECK-LABEL: @test_vlsseg3_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlsseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 12)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP36:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlsseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP37:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP36]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP37]]
 ;
@@ -856,7 +1564,30 @@ define <vscale x 1 x i32> @test_vlsseg4_nxv1i32(ptr %base, i64 %offset, i64 %vl)
 ; CHECK-LABEL: @test_vlsseg4_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlsseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 16)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlsseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP49:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP48]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP49]]
 ;
@@ -870,7 +1601,30 @@ define <vscale x 1 x i32> @test_vlsseg4_mask_nxv1i32(ptr %base, i64 %offset, i64
 ; CHECK-LABEL: @test_vlsseg4_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlsseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 16)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP48:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlsseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP49:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP48]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP49]]
 ;
@@ -888,7 +1642,30 @@ define <vscale x 1 x i32> @test_vlsseg5_nxv1i32(ptr %base, i64 %offset, i64 %vl)
 ; CHECK-LABEL: @test_vlsseg5_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlsseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 20)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlsseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP61:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP60]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP61]]
 ;
@@ -902,7 +1679,30 @@ define <vscale x 1 x i32> @test_vlsseg5_mask_nxv1i32(ptr %base, i64 %offset, i64
 ; CHECK-LABEL: @test_vlsseg5_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlsseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 20)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP60:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlsseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP61:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP60]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP61]]
 ;
@@ -920,7 +1720,30 @@ define <vscale x 1 x i32> @test_vlsseg6_nxv1i32(ptr %base, i64 %offset, i64 %vl)
 ; CHECK-LABEL: @test_vlsseg6_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlsseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 24)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlsseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP73:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP72]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP73]]
 ;
@@ -934,7 +1757,30 @@ define <vscale x 1 x i32> @test_vlsseg6_mask_nxv1i32(ptr %base, i64 %offset, i64
 ; CHECK-LABEL: @test_vlsseg6_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlsseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 24)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP72:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlsseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP73:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP72]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP73]]
 ;
@@ -952,7 +1798,30 @@ define <vscale x 1 x i32> @test_vlsseg7_nxv1i32(ptr %base, i64 %offset, i64 %vl)
 ; CHECK-LABEL: @test_vlsseg7_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlsseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 28)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlsseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP85:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP84]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP85]]
 ;
@@ -966,7 +1835,30 @@ define <vscale x 1 x i32> @test_vlsseg7_mask_nxv1i32(ptr %base, i64 %offset, i64
 ; CHECK-LABEL: @test_vlsseg7_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlsseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 28)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP84:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlsseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP85:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP84]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP85]]
 ;
@@ -984,7 +1876,30 @@ define <vscale x 1 x i32> @test_vlsseg8_nxv1i32(ptr %base, i64 %offset, i64 %vl)
 ; CHECK-LABEL: @test_vlsseg8_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlsseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 32)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlsseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP97:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP96]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP97]]
 ;
@@ -998,7 +1913,30 @@ define <vscale x 1 x i32> @test_vlsseg8_mask_nxv1i32(ptr %base, i64 %offset, i64
 ; CHECK-LABEL: @test_vlsseg8_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlsseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP9]], i64 32)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP96:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlsseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP97:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP96]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP97]]
 ;
@@ -1016,7 +1954,30 @@ define void @test_vssseg2_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>
 ; CHECK-LABEL: @test_vssseg2_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 8)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg2.triscv.vector.tuple_nxv4i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1028,7 +1989,30 @@ define void @test_vssseg2_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4
 ; CHECK-LABEL: @test_vssseg2_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 8)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1044,7 +2028,30 @@ define void @test_vssseg3_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>
 ; CHECK-LABEL: @test_vssseg3_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 12)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg3.triscv.vector.tuple_nxv4i8_3t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1056,7 +2063,30 @@ define void @test_vssseg3_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4
 ; CHECK-LABEL: @test_vssseg3_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 12)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1072,7 +2102,30 @@ define void @test_vssseg4_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>
 ; CHECK-LABEL: @test_vssseg4_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 16)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg4.triscv.vector.tuple_nxv4i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1084,7 +2137,30 @@ define void @test_vssseg4_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4
 ; CHECK-LABEL: @test_vssseg4_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 16)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1100,7 +2176,30 @@ define void @test_vssseg5_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>
 ; CHECK-LABEL: @test_vssseg5_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 20)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg5.triscv.vector.tuple_nxv4i8_5t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1112,7 +2211,30 @@ define void @test_vssseg5_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4
 ; CHECK-LABEL: @test_vssseg5_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 20)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1128,7 +2250,30 @@ define void @test_vssseg6_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>
 ; CHECK-LABEL: @test_vssseg6_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 24)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg6.triscv.vector.tuple_nxv4i8_6t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1140,7 +2285,30 @@ define void @test_vssseg6_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4
 ; CHECK-LABEL: @test_vssseg6_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 24)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1156,7 +2324,30 @@ define void @test_vssseg7_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>
 ; CHECK-LABEL: @test_vssseg7_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 28)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg7.triscv.vector.tuple_nxv4i8_7t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1168,7 +2359,30 @@ define void @test_vssseg7_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4
 ; CHECK-LABEL: @test_vssseg7_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 28)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1184,7 +2398,30 @@ define void @test_vssseg8_nxv1i32(target("riscv.vector.tuple", <vscale x 4 x i8>
 ; CHECK-LABEL: @test_vssseg8_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 32)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg8.triscv.vector.tuple_nxv4i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1196,7 +2433,30 @@ define void @test_vssseg8_mask_nxv1i32(target("riscv.vector.tuple", <vscale x 4
 ; CHECK-LABEL: @test_vssseg8_mask_nxv1i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vssseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], i64 [[OFFSET:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP11:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP3]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[IV_NEXT:%.*]], [[TMP10:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP10]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[IV]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP9]], i64 32)
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:       10:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP4]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    tail call void @llvm.riscv.vssseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.i64.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], i64 [[OFFSET]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -1687,7 +2947,31 @@ define <vscale x 1 x i32> @test_vloxseg2_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vloxseg2_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 8)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP26:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP25]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP26]]
 ;
@@ -1701,7 +2985,31 @@ define <vscale x 1 x i32> @test_vloxseg2_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vloxseg2_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 8)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP26:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP25]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP26]]
 ;
@@ -1719,7 +3027,31 @@ define <vscale x 1 x i32> @test_vloxseg3_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vloxseg3_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 12)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP38:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP37]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP38]]
 ;
@@ -1733,7 +3065,31 @@ define <vscale x 1 x i32> @test_vloxseg3_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vloxseg3_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 12)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP38:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP37]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP38]]
 ;
@@ -1751,7 +3107,31 @@ define <vscale x 1 x i32> @test_vloxseg4_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vloxseg4_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 16)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vloxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP50:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP49]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP50]]
 ;
@@ -1765,7 +3145,31 @@ define <vscale x 1 x i32> @test_vloxseg4_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vloxseg4_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 16)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vloxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP50:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP49]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP50]]
 ;
@@ -1783,7 +3187,31 @@ define <vscale x 1 x i32> @test_vloxseg5_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vloxseg5_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 20)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vloxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP62:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP61]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP62]]
 ;
@@ -1797,7 +3225,31 @@ define <vscale x 1 x i32> @test_vloxseg5_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vloxseg5_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 20)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vloxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP62:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP61]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP62]]
 ;
@@ -1815,7 +3267,31 @@ define <vscale x 1 x i32> @test_vloxseg6_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vloxseg6_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 24)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vloxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP74:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP73]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP74]]
 ;
@@ -1829,7 +3305,31 @@ define <vscale x 1 x i32> @test_vloxseg6_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vloxseg6_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 24)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vloxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP74:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP73]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP74]]
 ;
@@ -1847,7 +3347,31 @@ define <vscale x 1 x i32> @test_vloxseg7_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vloxseg7_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 28)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vloxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP86:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP85]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP86]]
 ;
@@ -1861,7 +3385,31 @@ define <vscale x 1 x i32> @test_vloxseg7_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vloxseg7_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 28)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vloxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP86:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP85]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP86]]
 ;
@@ -1879,7 +3427,31 @@ define <vscale x 1 x i32> @test_vloxseg8_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vloxseg8_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 32)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vloxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP98:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP97]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP98]]
 ;
@@ -1893,7 +3465,31 @@ define <vscale x 1 x i32> @test_vloxseg8_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vloxseg8_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 32)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vloxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP98:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP97]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP98]]
 ;
@@ -1911,7 +3507,31 @@ define <vscale x 1 x i32> @test_vluxseg2_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vluxseg2_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 8)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP26:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP25]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP26]]
 ;
@@ -1925,7 +3545,31 @@ define <vscale x 1 x i32> @test_vluxseg2_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vluxseg2_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 8)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP25:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP26:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[TMP25]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP26]]
 ;
@@ -1943,7 +3587,31 @@ define <vscale x 1 x i32> @test_vluxseg3_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vluxseg3_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 12)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP38:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP37]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP38]]
 ;
@@ -1957,7 +3625,31 @@ define <vscale x 1 x i32> @test_vluxseg3_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vluxseg3_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 12)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP37:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP38:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[TMP37]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP38]]
 ;
@@ -1975,7 +3667,31 @@ define <vscale x 1 x i32> @test_vluxseg4_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vluxseg4_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 16)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP50:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP49]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP50]]
 ;
@@ -1989,7 +3705,31 @@ define <vscale x 1 x i32> @test_vluxseg4_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vluxseg4_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 16)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP49:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP50:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[TMP49]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP50]]
 ;
@@ -2007,7 +3747,31 @@ define <vscale x 1 x i32> @test_vluxseg5_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vluxseg5_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 20)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vluxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP62:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP61]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP62]]
 ;
@@ -2021,7 +3785,31 @@ define <vscale x 1 x i32> @test_vluxseg5_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vluxseg5_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 20)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP61:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vluxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP62:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[TMP61]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP62]]
 ;
@@ -2039,7 +3827,31 @@ define <vscale x 1 x i32> @test_vluxseg6_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vluxseg6_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 24)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vluxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP74:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP73]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP74]]
 ;
@@ -2053,7 +3865,31 @@ define <vscale x 1 x i32> @test_vluxseg6_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vluxseg6_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 24)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP73:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vluxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP74:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[TMP73]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP74]]
 ;
@@ -2071,7 +3907,31 @@ define <vscale x 1 x i32> @test_vluxseg7_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vluxseg7_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 28)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vluxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP86:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP85]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP86]]
 ;
@@ -2085,7 +3945,31 @@ define <vscale x 1 x i32> @test_vluxseg7_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vluxseg7_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 28)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP85:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vluxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP86:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[TMP85]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP86]]
 ;
@@ -2103,7 +3987,31 @@ define <vscale x 1 x i32> @test_vluxseg8_nxv1i32_nxv1i16(ptr %base, <vscale x 1
 ; CHECK-LABEL: @test_vluxseg8_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 32)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vluxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    [[TMP98:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP97]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP98]]
 ;
@@ -2117,7 +4025,31 @@ define <vscale x 1 x i32> @test_vluxseg8_mask_nxv1i32_nxv1i16(ptr %base, <vscale
 ; CHECK-LABEL: @test_vluxseg8_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 1, i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_loadN(i64 [[TMP10]], i64 32)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    [[TMP97:%.*]] = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vluxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) poison, ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 1, i64 5)
 ; CHECK-NEXT:    [[TMP98:%.*]] = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[TMP97]], i32 1)
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[TMP98]]
 ;
@@ -2135,7 +4067,31 @@ define void @test_vsoxseg2_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsoxseg2_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 8)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2147,7 +4103,31 @@ define void @test_vsoxseg2_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsoxseg2_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 8)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2163,7 +4143,31 @@ define void @test_vsoxseg3_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsoxseg3_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 12)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2175,7 +4179,31 @@ define void @test_vsoxseg3_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsoxseg3_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 12)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2191,7 +4219,31 @@ define void @test_vsoxseg4_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsoxseg4_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 16)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2203,7 +4255,31 @@ define void @test_vsoxseg4_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsoxseg4_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 16)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2219,7 +4295,31 @@ define void @test_vsoxseg5_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsoxseg5_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 20)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2231,7 +4331,31 @@ define void @test_vsoxseg5_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsoxseg5_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 20)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2247,7 +4371,31 @@ define void @test_vsoxseg6_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsoxseg6_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 24)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2259,7 +4407,31 @@ define void @test_vsoxseg6_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsoxseg6_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 24)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2275,7 +4447,31 @@ define void @test_vsoxseg7_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsoxseg7_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 28)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2287,7 +4483,31 @@ define void @test_vsoxseg7_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsoxseg7_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 28)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2303,7 +4523,31 @@ define void @test_vsoxseg8_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsoxseg8_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 32)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2315,7 +4559,31 @@ define void @test_vsoxseg8_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsoxseg8_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 32)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2331,7 +4599,31 @@ define void @test_vsuxseg2_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsuxseg2_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 8)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg2.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2343,7 +4635,31 @@ define void @test_vsuxseg2_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsuxseg2_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 8)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg2.mask.triscv.vector.tuple_nxv4i8_2t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2359,7 +4675,31 @@ define void @test_vsuxseg3_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsuxseg3_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 12)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg3.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2371,7 +4711,31 @@ define void @test_vsuxseg3_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsuxseg3_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 12)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg3.mask.triscv.vector.tuple_nxv4i8_3t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2387,7 +4751,31 @@ define void @test_vsuxseg4_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsuxseg4_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 16)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg4.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2399,7 +4787,31 @@ define void @test_vsuxseg4_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsuxseg4_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 16)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg4.mask.triscv.vector.tuple_nxv4i8_4t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2415,7 +4827,31 @@ define void @test_vsuxseg5_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsuxseg5_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 20)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg5.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2427,7 +4863,31 @@ define void @test_vsuxseg5_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsuxseg5_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 20)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg5.mask.triscv.vector.tuple_nxv4i8_5t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2443,7 +4903,31 @@ define void @test_vsuxseg6_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsuxseg6_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 24)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg6.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2455,7 +4939,31 @@ define void @test_vsuxseg6_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsuxseg6_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 24)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg6.mask.triscv.vector.tuple_nxv4i8_6t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2471,7 +4979,31 @@ define void @test_vsuxseg7_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsuxseg7_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 28)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg7.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2483,7 +5015,31 @@ define void @test_vsuxseg7_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsuxseg7_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 28)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg7.mask.triscv.vector.tuple_nxv4i8_7t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2499,7 +5055,31 @@ define void @test_vsuxseg8_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vscale
 ; CHECK-LABEL: @test_vsuxseg8_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> splat (i1 true), i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 32)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsuxseg8.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -2511,7 +5091,31 @@ define void @test_vsuxseg8_mask_nxv1i32_nxv1i16(target("riscv.vector.tuple", <vs
 ; CHECK-LABEL: @test_vsuxseg8_mask_nxv1i32_nxv1i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8
-; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE:%.*]], <vscale x 1 x i16> [[INDEX:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <vscale x 1 x i16> [[INDEX:%.*]] to <vscale x 1 x i64>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], <vscale x 1 x i64> [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[VL:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[VL]], i64 [[TMP5]])
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP4]] ], [ [[IV_NEXT:%.*]], [[TMP11:%.*]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 1 x i1> [[MASK:%.*]], i64 [[IV]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP11]]
+; CHECK:       8:
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <vscale x 1 x ptr> [[TMP2]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64
+; CHECK-NEXT:    call void @__asan_storeN(i64 [[TMP10]], i64 32)
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
+; CHECK:       .split.split:
+; CHECK-NEXT:    br label [[TMP12]]
+; CHECK:       12:
+; CHECK-NEXT:    tail call void @llvm.riscv.vsoxseg8.mask.triscv.vector.tuple_nxv4i8_8t.p0.nxv1i16.nxv1i1.i64(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) [[VAL:%.*]], ptr [[BASE]], <vscale x 1 x i16> [[INDEX]], <vscale x 1 x i1> [[MASK]], i64 [[VL]], i64 5)
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Instrumentation/AllocToken/basic.ll b/llvm/test/Instrumentation/AllocToken/basic.ll
new file mode 100644
index 0000000..099d37d
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/basic.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare ptr @malloc(i64)
+declare ptr @calloc(i64, i64)
+declare ptr @realloc(ptr, i64)
+declare ptr @_Znwm(i64)
+declare ptr @_Znam(i64)
+declare void @free(ptr)
+declare void @_ZdlPv(ptr)
+declare i32 @foobar(i64)
+
+; Test basic allocation call rewriting
+define ptr @test_basic_rewriting() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_basic_rewriting(
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token_malloc(i64 64, i64 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @__alloc_token_calloc(i64 8, i64 8, i64 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = call ptr @__alloc_token_realloc(ptr [[TMP0]], i64 128, i64 2)
+; CHECK-NEXT:    ret ptr [[TMP2]]
+;
+entry:
+  %ptr1 = call ptr @malloc(i64 64)
+  %ptr2 = call ptr @calloc(i64 8, i64 8)
+  %ptr3 = call ptr @realloc(ptr %ptr1, i64 128)
+  ret ptr %ptr3
+}
+
+; Test C++ operator rewriting
+define ptr @test_cpp_operators() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_cpp_operators(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token__Znwm(i64 32, i64 3)
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @__alloc_token__Znam(i64 64, i64 4)
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %ptr1 = call ptr @_Znwm(i64 32)
+  %ptr2 = call ptr @_Znam(i64 64)
+  ret ptr %ptr1
+}
+
+; Functions without sanitize_alloc_token do not get instrumented
+define ptr @without_attribute() {
+; CHECK-LABEL: define ptr @without_attribute() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = call ptr @malloc(i64 16)
+; CHECK-NEXT:    ret ptr [[PTR]]
+;
+entry:
+  %ptr = call ptr @malloc(i64 16)
+  ret ptr %ptr
+}
+
+; Test that free/delete are untouched
+define void @test_free_untouched(ptr %ptr) sanitize_alloc_token {
+; CHECK-LABEL: define void @test_free_untouched(
+; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR5]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @free(ptr [[PTR]])
+; CHECK-NEXT:    call void @_ZdlPv(ptr [[PTR]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @free(ptr %ptr)
+  call void @_ZdlPv(ptr %ptr)
+  ret void
+}
+
+; Non-allocation functions are untouched
+define i32 @no_allocations(i32 %x) sanitize_alloc_token {
+; CHECK-LABEL: define i32 @no_allocations(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR5]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @foobar(i64 42)
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+entry:
+  %result = call i32 @foobar(i64 42)
+  ret i32 %result
+}
+
+; Test that tail calls are preserved
+define ptr @test_tail_call_preserved() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_tail_call_preserved(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr @__alloc_token_malloc(i64 42, i64 5)
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %result = tail call ptr @malloc(i64 42)
+  ret ptr %result
+}
diff --git a/llvm/test/Instrumentation/AllocToken/basic32.ll b/llvm/test/Instrumentation/AllocToken/basic32.ll
new file mode 100644
index 0000000..944a452
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/basic32.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+
+declare ptr @malloc(i32)
+declare ptr @_Znwm(i32)
+
+define ptr @test_basic_rewriting() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_basic_rewriting(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token_malloc(i32 64, i32 0)
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %ptr1 = call ptr @malloc(i32 64)
+  ret ptr %ptr1
+}
+
+define ptr @test_cpp_operators() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_cpp_operators(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token__Znwm(i32 32, i32 1)
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %ptr1 = call ptr @_Znwm(i32 32)
+  ret ptr %ptr1
+}
diff --git a/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll
new file mode 100644
index 0000000..5f08552
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; Test for special libfuncs not automatically considered allocation functions.
+;
+; RUN: opt < %s -passes=inferattrs,alloc-token -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare {ptr, i64} @__size_returning_new(i64)
+
+define ptr @test_extra_libfuncs() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_extra_libfuncs(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { ptr, i64 } @__alloc_token___size_returning_new(i64 10, i64 2689373973731826898), !alloc_token [[META0:![0-9]+]]
+; CHECK-NEXT:    [[PTR1:%.*]] = extractvalue { ptr, i64 } [[TMP0]], 0
+; CHECK-NEXT:    ret ptr [[PTR1]]
+;
+entry:
+  %srn = call {ptr, i64} @__size_returning_new(i64 10), !alloc_token !0
+  %ptr1  = extractvalue {ptr, i64} %srn, 0
+  ret ptr %ptr1
+}
+
+declare ptr @_Znwm(i64) nobuiltin allocsize(0)
+declare ptr @_Znam(i64) nobuiltin allocsize(0)
+
+define ptr @test_replaceable_new() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_replaceable_new(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token__Znwm(i64 32, i64 2689373973731826898), !alloc_token [[META0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @__alloc_token__Znam(i64 64, i64 2689373973731826898), !alloc_token [[META0]]
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %ptr1 = call ptr @_Znwm(i64 32), !alloc_token !0
+  %ptr2 = call ptr @_Znam(i64 64), !alloc_token !0
+  ret ptr %ptr1
+}
+
+!0 = !{!"int"}
+;.
+; CHECK: [[META0]] = !{!"int"}
+;.
diff --git a/llvm/test/Instrumentation/AllocToken/fast.ll b/llvm/test/Instrumentation/AllocToken/fast.ll
new file mode 100644
index 0000000..19a3ef6
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/fast.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -alloc-token-fast-abi -alloc-token-max=3 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare ptr @malloc(i64)
+declare ptr @calloc(i64, i64)
+declare ptr @realloc(ptr, i64)
+declare ptr @_Znwm(i64)
+declare ptr @_Znam(i64)
+
+; Test basic allocation call rewriting
+define ptr @test_basic_rewriting() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_basic_rewriting(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR1:%.*]] = call ptr @__alloc_token_0_malloc(i64 64)
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @__alloc_token_1_calloc(i64 8, i64 8)
+; CHECK-NEXT:    [[PTR3:%.*]] = call ptr @__alloc_token_2_realloc(ptr [[PTR1]], i64 128)
+; CHECK-NEXT:    ret ptr [[PTR3]]
+;
+entry:
+  %ptr1 = call ptr @malloc(i64 64)
+  %ptr2 = call ptr @calloc(i64 8, i64 8)
+  %ptr3 = call ptr @realloc(ptr %ptr1, i64 128)
+  ret ptr %ptr3
+}
+
+; Test C++ operator rewriting
+define ptr @test_cpp_operators() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_cpp_operators(
+; CHECK-SAME: ) #[[ATTR4]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR1:%.*]] = call ptr @__alloc_token_0__Znwm(i64 32)
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @__alloc_token_1__Znam(i64 64)
+; CHECK-NEXT:    ret ptr [[PTR1]]
+;
+entry:
+  %ptr1 = call ptr @_Znwm(i64 32)
+  %ptr2 = call ptr @_Znam(i64 64)
+  ret ptr %ptr1
+}
diff --git a/llvm/test/Instrumentation/AllocToken/ignore.ll b/llvm/test/Instrumentation/AllocToken/ignore.ll
new file mode 100644
index 0000000..b92a920
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/ignore.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; Test for all allocation functions that should be ignored by default.
+;
+; RUN: opt < %s -passes=inferattrs,alloc-token -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare ptr @strdup(ptr)
+declare ptr @__strdup(ptr)
+declare ptr @strndup(ptr, i64)
+declare ptr @__strndup(ptr, i64)
+
+define ptr @test_ignored_allocation_functions(ptr %ptr) sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_ignored_allocation_functions(
+; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR1:%.*]] = call ptr @strdup(ptr [[PTR]])
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @__strdup(ptr [[PTR]])
+; CHECK-NEXT:    [[PTR3:%.*]] = call ptr @strndup(ptr [[PTR]], i64 42)
+; CHECK-NEXT:    [[PTR4:%.*]] = call ptr @__strndup(ptr [[PTR]], i64 42)
+; CHECK-NEXT:    ret ptr [[PTR1]]
+;
+entry:
+  %ptr1 = call ptr @strdup(ptr %ptr)
+  %ptr2 = call ptr @__strdup(ptr %ptr)
+  %ptr3 = call ptr @strndup(ptr %ptr, i64 42)
+  %ptr4 = call ptr @__strndup(ptr %ptr, i64 42)
+  ret ptr %ptr1
+}
diff --git a/llvm/test/Instrumentation/AllocToken/invoke.ll b/llvm/test/Instrumentation/AllocToken/invoke.ll
new file mode 100644
index 0000000..347c99a
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/invoke.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define ptr @test_invoke_malloc() sanitize_alloc_token personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: define ptr @test_invoke_malloc(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__gxx_personality_v0 {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = invoke ptr @__alloc_token_malloc(i64 64, i64 0)
+; CHECK-NEXT:            to label %[[NORMAL:.*]] unwind label %[[CLEANUP:.*]]
+; CHECK:       [[NORMAL]]:
+; CHECK-NEXT:    ret ptr [[TMP0]]
+; CHECK:       [[CLEANUP]]:
+; CHECK-NEXT:    [[LP:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    ret ptr null
+;
+entry:
+  %ptr = invoke ptr @malloc(i64 64) to label %normal unwind label %cleanup
+
+normal:
+  ret ptr %ptr
+
+cleanup:
+  %lp = landingpad { ptr, i32 } cleanup
+  ret ptr null
+}
+
+define ptr @test_invoke_operator_new() sanitize_alloc_token personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: define ptr @test_invoke_operator_new(
+; CHECK-SAME: ) #[[ATTR0]] personality ptr @__gxx_personality_v0 {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = invoke ptr @__alloc_token__Znwm(i64 32, i64 1)
+; CHECK-NEXT:            to label %[[NORMAL:.*]] unwind label %[[CLEANUP:.*]]
+; CHECK:       [[NORMAL]]:
+; CHECK-NEXT:    ret ptr [[TMP0]]
+; CHECK:       [[CLEANUP]]:
+; CHECK-NEXT:    [[LP:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    ret ptr null
+;
+entry:
+  %ptr = invoke ptr @_Znwm(i64 32) to label %normal unwind label %cleanup
+
+normal:
+  ret ptr %ptr
+
+cleanup:
+  %lp = landingpad { ptr, i32 } cleanup
+  ret ptr null
+}
+
+; Test complex exception flow with multiple invoke allocations
+define ptr @test_complex_invoke_flow() sanitize_alloc_token personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: define ptr @test_complex_invoke_flow(
+; CHECK-SAME: ) #[[ATTR0]] personality ptr @__gxx_personality_v0 {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = invoke ptr @__alloc_token_malloc(i64 16, i64 2)
+; CHECK-NEXT:            to label %[[FIRST_OK:.*]] unwind label %[[CLEANUP1:.*]]
+; CHECK:       [[FIRST_OK]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = invoke ptr @__alloc_token__Znwm(i64 32, i64 3)
+; CHECK-NEXT:            to label %[[SECOND_OK:.*]] unwind label %[[CLEANUP2:.*]]
+; CHECK:       [[SECOND_OK]]:
+; CHECK-NEXT:    ret ptr [[TMP0]]
+; CHECK:       [[CLEANUP1]]:
+; CHECK-NEXT:    [[LP1:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    ret ptr null
+; CHECK:       [[CLEANUP2]]:
+; CHECK-NEXT:    [[LP2:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    ret ptr null
+;
+entry:
+  %ptr1 = invoke ptr @malloc(i64 16) to label %first_ok unwind label %cleanup1
+
+first_ok:
+  %ptr2 = invoke ptr @_Znwm(i64 32) to label %second_ok unwind label %cleanup2
+
+second_ok:
+  ret ptr %ptr1
+
+cleanup1:
+  %lp1 = landingpad { ptr, i32 } cleanup
+  ret ptr null
+
+cleanup2:
+  %lp2 = landingpad { ptr, i32 } cleanup
+  ret ptr null
+}
+
+; Test mixed call/invoke
+define ptr @test_mixed_call_invoke() sanitize_alloc_token personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: define ptr @test_mixed_call_invoke(
+; CHECK-SAME: ) #[[ATTR0]] personality ptr @__gxx_personality_v0 {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token_malloc(i64 8, i64 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = invoke ptr @__alloc_token_malloc(i64 16, i64 5)
+; CHECK-NEXT:            to label %[[NORMAL:.*]] unwind label %[[CLEANUP:.*]]
+; CHECK:       [[NORMAL]]:
+; CHECK-NEXT:    ret ptr [[TMP0]]
+; CHECK:       [[CLEANUP]]:
+; CHECK-NEXT:    [[LP:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    ret ptr null
+;
+entry:
+  %ptr1 = call ptr @malloc(i64 8)
+
+  %ptr2 = invoke ptr @malloc(i64 16) to label %normal unwind label %cleanup
+
+normal:
+  ret ptr %ptr1
+
+cleanup:
+  %lp = landingpad { ptr, i32 } cleanup
+  ret ptr null
+}
+
+declare ptr @malloc(i64)
+declare ptr @_Znwm(i64)
+declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
new file mode 100644
index 0000000..e023ab6b
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=increment -alloc-token-extended -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare ptr @malloc(i64)
+declare ptr @custom_malloc(i64)
+declare ptr @kmalloc(i64, i64)
+
+define ptr @test_libcall() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_libcall(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token_malloc(i64 64, i64 0)
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %ptr1 = call ptr @malloc(i64 64)
+  ret ptr %ptr1
+}
+
+define ptr @test_libcall_hint() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_libcall_hint(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token_malloc(i64 64, i64 1), !alloc_token [[META0:![0-9]+]]
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %ptr1 = call ptr @malloc(i64 64), !alloc_token !0
+  ret ptr %ptr1
+}
+
+define ptr @test_nonlibcall_nohint() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_nonlibcall_nohint(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR1:%.*]] = call ptr @custom_malloc(i64 8)
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @kmalloc(i64 32, i64 0)
+; CHECK-NEXT:    ret ptr [[PTR1]]
+;
+entry:
+  %ptr1 = call ptr @custom_malloc(i64 8)
+  %ptr2 = call ptr @kmalloc(i64 32, i64 0)
+  ret ptr %ptr1
+}
+
+define ptr @test_nonlibcall_hint() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_nonlibcall_hint(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token_custom_malloc(i64 8, i64 2), !alloc_token [[META0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @__alloc_token_kmalloc(i64 32, i64 0, i64 3), !alloc_token [[META0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call ptr @__alloc_token_custom_malloc(i64 64, i64 4), !alloc_token [[META0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call ptr @__alloc_token_kmalloc(i64 128, i64 2, i64 5), !alloc_token [[META0]]
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %ptr1 = call ptr @custom_malloc(i64 8), !alloc_token !0
+  %ptr2 = call ptr @kmalloc(i64 32, i64 0), !alloc_token !0
+  %ptr3 = call ptr @custom_malloc(i64 64), !alloc_token !0
+  %ptr4 = call ptr @kmalloc(i64 128, i64 2), !alloc_token !0
+  ret ptr %ptr1
+}
+
+; Functions without sanitize_alloc_token do not get instrumented
+define ptr @without_attribute() {
+; CHECK-LABEL: define ptr @without_attribute() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR1:%.*]] = call ptr @malloc(i64 64), !alloc_token [[META0]]
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @custom_malloc(i64 8), !alloc_token [[META0]]
+; CHECK-NEXT:    [[PTR3:%.*]] = call ptr @kmalloc(i64 32, i64 0), !alloc_token [[META0]]
+; CHECK-NEXT:    ret ptr [[PTR1]]
+;
+entry:
+  %ptr1 = call ptr @malloc(i64 64), !alloc_token !0
+  %ptr2 = call ptr @custom_malloc(i64 8), !alloc_token !0
+  %ptr3 = call ptr @kmalloc(i64 32, i64 0), !alloc_token !0
+  ret ptr %ptr1
+}
+
+!0 = !{!"int"}
+;.
+; CHECK: [[META0]] = !{!"int"}
+;.
diff --git a/llvm/test/Instrumentation/AllocToken/remark.ll b/llvm/test/Instrumentation/AllocToken/remark.ll
new file mode 100644
index 0000000..a2404526
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/remark.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=inferattrs,alloc-token -pass-remarks=alloc-token -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare ptr @malloc(i64)
+
+; CHECK-NOT: remark: <unknown>:0:0: Call to 'malloc' in 'test_has_metadata' without source-level type token
+; CHECK: remark: <unknown>:0:0: Call to 'malloc' in 'test_no_metadata' without source-level type token
+
+define ptr @test_has_metadata() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_has_metadata(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token_malloc(i64 64, i64 2689373973731826898), !alloc_token [[META0:![0-9]+]]
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %ptr1 = call ptr @malloc(i64 64), !alloc_token !0
+  ret ptr %ptr1
+}
+
+define ptr @test_no_metadata() sanitize_alloc_token {
+; CHECK-LABEL: define ptr @test_no_metadata(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token_malloc(i64 32, i64 0)
+; CHECK-NEXT:    ret ptr [[TMP0]]
+;
+entry:
+  %ptr1 = call ptr @malloc(i64 32)
+  ret ptr %ptr1
+}
+
+!0 = !{!"int"}
+;.
+; CHECK: [[META0]] = !{!"int"}
+;.
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/abilist_aggregate.ll b/llvm/test/Instrumentation/DataFlowSanitizer/abilist_aggregate.ll
index 2cf5771..3cab62b 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/abilist_aggregate.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/abilist_aggregate.ll
@@ -13,7 +13,7 @@ define {i1, i7} @functional({i32, i1} %a, [2 x i7] %b) {
 
 define {i1, i7} @call_functional({i32, i1} %a, [2 x i7] %b) {
   ; CHECK-LABEL: @call_functional.dfsan
-  ; CHECK-NEXT: %[[#REG:]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; CHECK-NEXT: %[[#REG:]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; CHECK-NEXT: %[[#REG+1]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; CHECK-NEXT: %[[#REG+2]] = extractvalue { i8, i8 } %[[#REG+1]], 0
   ; CHECK-NEXT: %[[#REG+3]] = extractvalue { i8, i8 } %[[#REG+1]], 1
@@ -68,7 +68,7 @@ define {i1, i7} @call_uninstrumented({i32, i1} %a, [2 x i7] %b) {
 define {i1, i7} @call_custom_with_ret({i32, i1} %a, [2 x i7] %b) {
   ; CHECK: @call_custom_with_ret.dfsan
   ; CHECK: %labelreturn = alloca i8, align 1
-  ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0
   ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1
@@ -89,7 +89,7 @@ define {i1, i7} @call_custom_with_ret({i32, i1} %a, [2 x i7] %b) {
 
 define void @call_custom_without_ret({i32, i1} %a, [2 x i7] %b) {
   ; CHECK: @call_custom_without_ret.dfsan
-  ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0
   ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1
@@ -105,7 +105,7 @@ define void @call_custom_without_ret({i32, i1} %a, [2 x i7] %b) {
 
 define void @call_custom_varg({i32, i1} %a, [2 x i7] %b) {
   ; CHECK: @call_custom_varg.dfsan
-  ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; CHECK: %labelva = alloca [1 x i8], align 1
   ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0
@@ -126,7 +126,7 @@ define void @call_custom_varg({i32, i1} %a, [2 x i7] %b) {
 define {i1, i7} @call_custom_cb({i32, i1} %a, [2 x i7] %b) {
   ; CHECK: define { i1, i7 } @call_custom_cb.dfsan({ i32, i1 } %a, [2 x i7] %b) {
   ; CHECK: %labelreturn = alloca i8, align 1
-  ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0
   ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1
@@ -153,7 +153,7 @@ define {i1, i7} @custom_cb(ptr %cb, {i32, i1} %a, [2 x i7] %b) {
 
 define {i1, i7} @cb({i32, i1} %a, [2 x i7] %b) {
   ; CHECK: define { i1, i7 } @cb.dfsan({ i32, i1 } %a, [2 x i7] %b)
-  ; CHECK: [[BL:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; CHECK: [[BL:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; CHECK: [[AL:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; CHECK: [[AL1:%.*]] = extractvalue { i8, i8 } [[AL]], 1
   ; CHECK: [[BL0:%.*]] = extractvalue [2 x i8] [[BL]], 0
@@ -180,8 +180,8 @@ define ptr @ret_custom() {
 ; COMM: TODO simplify the expression [[#mul(2,SBYTES) + max(SBYTES,2)]] to
 ; COMM: [[#mul(3,SBYTES)]], if shadow-tls-alignment is updated to match shadow
 ; COMM: width bytes.
-; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]]
-; CHECK: [[A:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]]
+; CHECK: [[A:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
 ; CHECK: [[CB:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
 ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0
 ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1
@@ -198,7 +198,7 @@ define ptr @ret_custom() {
 define {i1, i7} @custom_with_ret({i32, i1} %a, [2 x i7] %b) {
   ; CHECK: define linkonce_odr { i1, i7 } @"dfsw$custom_with_ret"({ i32, i1 } %0, [2 x i7] %1)
   ; CHECK: %labelreturn = alloca i8, align 1
-  ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0
   ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1
@@ -221,7 +221,7 @@ define {i1, i7} @custom_with_ret({i32, i1} %a, [2 x i7] %b) {
 
 define void @custom_without_ret({i32, i1} %a, [2 x i7] %b) {
   ; CHECK: define linkonce_odr void @"dfsw$custom_without_ret"({ i32, i1 } %0, [2 x i7] %1)
-  ; CHECK: [[B:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; CHECK: [[B:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; CHECK: [[A:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; CHECK: [[A0:%.*]] = extractvalue { i8, i8 } [[A]], 0
   ; CHECK: [[A1:%.*]] = extractvalue { i8, i8 } [[A]], 1
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/arith.ll b/llvm/test/Instrumentation/DataFlowSanitizer/arith.ll
index 8c9eb5f..b474383 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/arith.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/arith.ll
@@ -1,73 +1,86 @@
-; RUN: opt < %s -passes=dfsan -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-add-global-name-suffix=0 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define i8 @add(i8 %a, i8 %b) {
-  ; CHECK: @add.dfsan
-  ; CHECK-DAG: %[[#ALABEL:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]]
-  ; CHECK-DAG: %[[#BLABEL:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
-  ; CHECK: %[[#UNION:]] = or i8 %[[#ALABEL]], %[[#BLABEL]]
-  ; CHECK: %c = add i8 %a, %b
-  ; CHECK: store i8 %[[#UNION]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; CHECK: ret i8 %c
+; CHECK-LABEL: define i8 @add(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[A]], [[B]]
+; CHECK-NEXT:    store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret i8 [[C]]
+;
   %c = add i8 %a, %b
   ret i8 %c
 }
 
 define i8 @sub(i8 %a, i8 %b) {
-  ; CHECK: @sub.dfsan
-  ; CHECK: load{{.*}}__dfsan_arg_tls
-  ; CHECK: load{{.*}}__dfsan_arg_tls
-  ; CHECK: or i8
-  ; CHECK: %c = sub i8 %a, %b
-  ; CHECK: store{{.*}}__dfsan_retval_tls
-  ; CHECK: ret i8 %c
+; CHECK-LABEL: define i8 @sub(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[C:%.*]] = sub i8 [[A]], [[B]]
+; CHECK-NEXT:    store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret i8 [[C]]
+;
   %c = sub i8 %a, %b
   ret i8 %c
 }
 
 define i8 @mul(i8 %a, i8 %b) {
-  ; CHECK: @mul.dfsan
-  ; CHECK: load{{.*}}__dfsan_arg_tls
-  ; CHECK: load{{.*}}__dfsan_arg_tls
-  ; CHECK: or i8
-  ; CHECK: %c = mul i8 %a, %b
-  ; CHECK: store{{.*}}__dfsan_retval_tls
-  ; CHECK: ret i8 %c
+; CHECK-LABEL: define i8 @mul(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[C:%.*]] = mul i8 [[A]], [[B]]
+; CHECK-NEXT:    store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret i8 [[C]]
+;
   %c = mul i8 %a, %b
   ret i8 %c
 }
 
 define i8 @sdiv(i8 %a, i8 %b) {
-  ; CHECK: @sdiv.dfsan
-  ; CHECK: load{{.*}}__dfsan_arg_tls
-  ; CHECK: load{{.*}}__dfsan_arg_tls
-  ; CHECK: or i8
-  ; CHECK: %c = sdiv i8 %a, %b
-  ; CHECK: store{{.*}}__dfsan_retval_tls
-  ; CHECK: ret i8 %c
+; CHECK-LABEL: define i8 @sdiv(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[C:%.*]] = sdiv i8 [[A]], [[B]]
+; CHECK-NEXT:    store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret i8 [[C]]
+;
   %c = sdiv i8 %a, %b
   ret i8 %c
 }
 
 define i8 @udiv(i8 %a, i8 %b) {
-  ; CHECK: @udiv.dfsan
-  ; CHECK: load{{.*}}__dfsan_arg_tls
-  ; CHECK: load{{.*}}__dfsan_arg_tls
-  ; CHECK: or i8
-  ; CHECK: %c = udiv i8 %a, %b
-  ; CHECK: store{{.*}}__dfsan_retval_tls
-  ; CHECK: ret i8 %c
+; CHECK-LABEL: define i8 @udiv(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[C:%.*]] = udiv i8 [[A]], [[B]]
+; CHECK-NEXT:    store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret i8 [[C]]
+;
   %c = udiv i8 %a, %b
   ret i8 %c
 }
 
 define double @fneg(double %a) {
-  ; CHECK: @fneg.dfsan
-  ; CHECK: load{{.*}}__dfsan_arg_tls
-  ; CHECK: %c = fneg double %a
-  ; CHECK: store{{.*}}__dfsan_retval_tls
-  ; CHECK: ret double %c
+; CHECK-LABEL: define double @fneg(
+; CHECK-SAME: double [[A:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[C:%.*]] = fneg double [[A]]
+; CHECK-NEXT:    store i8 [[TMP1]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret double [[C]]
+;
   %c = fneg double %a
   ret double %c
 }
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/array.ll b/llvm/test/Instrumentation/DataFlowSanitizer/array.ll
index 5642edc..14468c1 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/array.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/array.ll
@@ -158,7 +158,7 @@ define i1 @extract_array([4 x i1] %a) {
 define [4 x i1] @insert_array([4 x i1] %a, i1 %e2) {
   ; NO_COMBINE_LOAD_PTR: @insert_array.dfsan
   ; NO_COMBINE_LOAD_PTR: [[EM:%.*]] = load i8, ptr
-  ; NO_COMBINE_LOAD_PTR-SAME: inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]]
+  ; NO_COMBINE_LOAD_PTR-SAME: getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]]
   ; NO_COMBINE_LOAD_PTR: [[AM:%.*]] = load [4 x i8], ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; NO_COMBINE_LOAD_PTR: [[AM1:%.*]] = insertvalue [4 x i8] [[AM]], i8 [[EM]], 0
   ; NO_COMBINE_LOAD_PTR: store [4 x i8] [[AM1]], ptr @__dfsan_retval_tls, align [[ALIGN]]
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/dfsan-pass-second-run.ll b/llvm/test/Instrumentation/DataFlowSanitizer/dfsan-pass-second-run.ll
index 7da647b..7f49c14 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/dfsan-pass-second-run.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/dfsan-pass-second-run.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu"
 define i8 @add(i8 %a, i8 %b) {
   ; CHECK: @add.dfsan
   ; CHECK-DAG: %[[#ALABEL:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]]
-  ; CHECK-DAG: %[[#BLABEL:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+  ; CHECK-DAG: %[[#BLABEL:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
   ; CHECK: %[[#UNION:]] = or i8 %[[#ALABEL]], %[[#BLABEL]]
   ; CHECK: %c = add i8 %a, %b
   ; CHECK: store i8 %[[#UNION]], ptr @__dfsan_retval_tls, align [[ALIGN]]
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll b/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll
index 997681b..7574346 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/dont_combine_offset_labels_on_gep.ll
@@ -1,19 +1,26 @@
-; RUN: opt < %s -passes=dfsan -dfsan-combine-offset-labels-on-gep=false -S | FileCheck %s
-; RUN: opt < %s -passes=dfsan -dfsan-combine-offset-labels-on-gep=false -dfsan-track-origins=1 -S | FileCheck %s --check-prefixes=CHECK,CHECK_ORIGIN
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-combine-offset-labels-on-gep=false -dfsan-add-global-name-suffix=0 -S | FileCheck %s
+; RUN: opt < %s -passes=dfsan -dfsan-combine-offset-labels-on-gep=false -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s --check-prefix=CHECK_ORIGIN
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]]
-; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [[TLS_ARR]]
 define ptr @gepop(ptr %p, i32 %a, i32 %b, i32 %c) {
-  ; CHECK: @gepop.dfsan
-  ; CHECK_ORIGIN: %[[#PO:]] = load i32, ptr @__dfsan_arg_origin_tls, align [[ALIGN_O:4]]
-  ; CHECK: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN_S:2]]
-  ; CHECK: %e = getelementptr [10 x [20 x i32]], ptr %p, i32 %a, i32 %b, i32 %c
-  ; CHECK: store i8 %[[#PS]], ptr @__dfsan_retval_tls, align [[ALIGN_S]]
-  ; CHECK_ORIGIN: store i32 %[[#PO]], ptr @__dfsan_retval_origin_tls, align [[ALIGN_O]]
-
+; CHECK-LABEL: define ptr @gepop(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[E:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P]], i32 [[A]], i32 [[B]], i32 [[C]]
+; CHECK-NEXT:    store i8 [[TMP1]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret ptr [[E]]
+;
+; CHECK_ORIGIN-LABEL: define ptr @gepop(
+; CHECK_ORIGIN-SAME: ptr [[P:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK_ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK_ORIGIN-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK_ORIGIN-NEXT:    [[E:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P]], i32 [[A]], i32 [[B]], i32 [[C]]
+; CHECK_ORIGIN-NEXT:    store i8 [[TMP2]], ptr @__dfsan_retval_tls, align 2
+; CHECK_ORIGIN-NEXT:    store i32 [[TMP1]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK_ORIGIN-NEXT:    ret ptr [[E]]
+;
   %e = getelementptr [10 x [20 x i32]], ptr %p, i32 %a, i32 %b, i32 %c
   ret ptr %e
 }
-
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll
index 031fd1c..fbcdb3d 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_abilist.ll
@@ -114,7 +114,7 @@ define void @call_custom_without_ret(i32 %a, i32 %b) {
   ; CHECK: @call_custom_without_ret.dfsan
   ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
   ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
   ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
   ; CHECK: call void @__dfso_custom_without_ret(i32 %a, i32 %b, i8 zeroext [[AS]], i8 zeroext [[BS]], i32 zeroext [[AO]], i32 zeroext [[BO]])
   ; CHECK-NEXT: ret void
@@ -129,7 +129,7 @@ define i32 @call_custom_with_ret(i32 %a, i32 %b) {
   ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
   ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
   ; CHECK: %labelreturn = alloca i8, align 1
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
   ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
   ; CHECK: {{.*}} = call i32 @__dfso_custom_with_ret(i32 %a, i32 %b, i8 zeroext [[AS]], i8 zeroext [[BS]], ptr %labelreturn, i32 zeroext [[AO]], i32 zeroext [[BO]], ptr %originreturn)
   ; CHECK: [[RS:%.*]] = load i8, ptr %labelreturn, align 1
@@ -147,7 +147,7 @@ define void @call_custom_varg_without_ret(i32 %a, i32 %b) {
   ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
   ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
   ; CHECK: %labelva = alloca [1 x i8], align 1
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
   ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
   ; CHECK: [[VS0:%.*]] = getelementptr inbounds nuw [1 x i8], ptr %labelva, i32 0, i32 0
   ; CHECK: store i8 [[AS]], ptr [[VS0]], align 1
@@ -170,7 +170,7 @@ define i32 @call_custom_varg_with_ret(i32 %a, i32 %b) {
   ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls
   ; CHECK: %labelreturn = alloca i8, align 1
   ; CHECK: %labelva = alloca [1 x i8], align 1
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
   ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
   ; CHECK: [[VS0:%.*]] = getelementptr inbounds nuw [1 x i8], ptr %labelva, i32 0, i32 0
   ; CHECK: store i8 [[BS]], ptr [[VS0]], align 1
@@ -194,7 +194,7 @@ define i32 @call_custom_cb_with_ret(i32 %a, i32 %b) {
   ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
   ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
   ; CHECK: %labelreturn = alloca i8, align 1
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
   ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
   ; CHECK: {{.*}} = call i32 @__dfso_custom_cb_with_ret(ptr @cb_with_ret.dfsan, i32 %a, i32 %b, i8 zeroext 0, i8 zeroext [[AS]], i8 zeroext [[BS]], ptr %labelreturn, i32 zeroext 0, i32 zeroext [[AO]], i32 zeroext [[BO]], ptr %originreturn)
   ; CHECK: [[RS:%.*]] = load i8, ptr %labelreturn, align 1
@@ -210,7 +210,7 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) {
   ; CHECK-LABEL: @call_custom_cb_without_ret.dfsan
   ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
   ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; CHECK: [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
   ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
   ; CHECK: call void @__dfso_custom_cb_without_ret(ptr @cb_without_ret.dfsan, i32 %a, i32 %b, i8 zeroext 0, i8 zeroext [[AS]], i8 zeroext [[BS]], i32 zeroext 0, i32 zeroext [[AO]], i32 zeroext [[BO]])
   ; CHECK-NEXT: ret void
@@ -228,7 +228,7 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) {
 ; CHECK: define linkonce_odr void @"dfso$custom_without_ret"(i32 %0, i32 %1)
 ; CHECK:  [[BO:%.*]]  = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
 ; CHECK-NEXT:  [[AO:%.*]]  = load i32, ptr @__dfsan_arg_origin_tls, align 4
-; CHECK-NEXT:  [[BS:%.*]]  = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+; CHECK-NEXT:  [[BS:%.*]]  = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
 ; CHECK-NEXT:  [[AS:%.*]]  = load i8, ptr @__dfsan_arg_tls, align 2
 ; CHECK-NEXT:  call void @__dfso_custom_without_ret(i32 %0, i32 %1, i8 zeroext [[AS]], i8 zeroext [[BS]], i32 zeroext [[AO]], i32 zeroext [[BO]])
 ; CHECK-NEXT:  ret void
@@ -238,7 +238,7 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) {
 ; CHECK-NEXT:  [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
 ; CHECK-NEXT:  [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
 ; CHECK-NEXT:  %labelreturn = alloca i8, align 1
-; CHECK-NEXT:  [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+; CHECK-NEXT:  [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
 ; CHECK-NEXT:  [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
 ; CHECK-NEXT:  [[R:%.*]] = call i32 @__dfso_custom_with_ret(i32 %0, i32 %1, i8 zeroext [[AS]], i8 zeroext [[BS]], ptr %labelreturn, i32 zeroext [[AO]], i32 zeroext [[BO]], ptr %originreturn)
 ; CHECK-NEXT:  [[RS:%.*]] = load i8, ptr %labelreturn, align 1
@@ -261,8 +261,8 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) {
 ; CHECK-NEXT:  [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
 ; CHECK-NEXT:  [[CO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
 ; CHECK-NEXT:  %labelreturn = alloca i8, align 1
-; CHECK-NEXT:  [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2
-; CHECK-NEXT:  [[AS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+; CHECK-NEXT:  [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
+; CHECK-NEXT:  [[AS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
 ; CHECK-NEXT:  [[CS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
 ; CHECK-NEXT:  [[R:%.*]] = call i32 @__dfso_custom_cb_with_ret(ptr %0, i32 %1, i32 %2, i8 zeroext [[CS]], i8 zeroext [[AS]], i8 zeroext [[BS]], ptr %labelreturn, i32 zeroext [[CO]], i32 zeroext [[AO]], i32 zeroext [[BO]], ptr %originreturn)
 ; CHECK-NEXT:  [[RS:%.*]] = load i8, ptr %labelreturn, align 1
@@ -275,8 +275,8 @@ define void @call_custom_cb_without_ret(i32 %a, i32 %b) {
 ; CHECK:   [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4
 ; CHECK-NEXT:  [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
 ; CHECK-NEXT:  [[CO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-; CHECK-NEXT:  [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2
-; CHECK-NEXT:  [[AS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+; CHECK-NEXT:  [[BS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
+; CHECK-NEXT:  [[AS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
 ; CHECK-NEXT:  [[CS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
 ; CHECK-NEXT:  call void @__dfso_custom_cb_without_ret(ptr %0, i32 %1, i32 %2, i8 zeroext [[CS]], i8 zeroext [[AS]], i8 zeroext [[BS]], i32 zeroext [[CO]], i32 zeroext [[AO]], i32 zeroext [[BO]])
 ; CHECK-NEXT:  ret void
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll
index cb9a306e..194a193 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_cached_shadows.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1  -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s
 ;
 ; %i13 and %i15 have the same key in shadow cache. They should not reuse the same
 ; shadow because their blocks do not dominate each other. Origin tracking
@@ -7,43 +8,129 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]]
 define void @cached_shadows(double %arg) {
-  ; CHECK: @cached_shadows.dfsan
-  ; CHECK:  [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align
-  ; CHECK:  [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]]
-  ; CHECK: [[L1:.+]]:
-  ; CHECK:  {{.*}} = phi i8
-  ; CHECK:  {{.*}} = phi i32
-  ; CHECK:  {{.*}} = phi double [ 3.000000e+00
-  ; CHECK:  [[S_L1:%.*]] = phi i8 [ 0, %[[L0:.*]] ], [ [[S_L7:%.*]], %[[L7:.*]] ]
-  ; CHECK:  [[O_L1:%.*]] = phi i32 [ 0, %[[L0]] ], [ [[O_L7:%.*]], %[[L7]] ]
-  ; CHECK:  [[V_L1:%.*]] = phi double [ 4.000000e+00, %[[L0]] ], [ [[V_L7:%.*]], %[[L7]] ]
-  ; CHECK:  br i1 {{%.+}}, label %[[L2:.*]], label %[[L4:.*]]
-  ; CHECK: [[L2]]:
-  ; CHECK:  br i1 {{%.+}}, label %[[L3:.+]], label %[[L7]]
-  ; CHECK: [[L3]]:
-  ; CHECK:  [[S_L3:%.*]] = or i8
-  ; CHECK:  [[AS_NE_L3:%.*]] = icmp ne i8 [[AS]], 0
-  ; CHECK:  [[O_L3:%.*]] = select i1 [[AS_NE_L3]], i32 %{{[0-9]+}}, i32 [[O_L1]]
-  ; CHECK:  [[V_L3:%.*]] = fsub double [[V_L1]], %{{.+}}
-  ; CHECK:  br label %[[L7]]
-  ; CHECK: [[L4]]:
-  ; CHECK:  br i1 %_dfscmp, label %[[L5:.+]], label %[[L6:.+]],
-  ; CHECK: [[L5]]:
-  ; CHECK:  br label %[[L6]]
-  ; CHECK: [[L6]]:
-  ; CHECK:  [[S_L6:%.*]] = or i8
-  ; CHECK:  [[AS_NE_L6:%.*]] = icmp ne i8 [[AS]], 0
-  ; CHECK:  [[O_L6:%.*]] = select i1 [[AS_NE_L6]], i32 [[AO]], i32 [[O_L1]]
-  ; CHECK:  [[V_L6:%.*]] = fadd double [[V_L1]], %{{.+}}
-  ; CHECK:  br label %[[L7]]
-  ; CHECK: [[L7]]:
-  ; CHECK:  [[S_L7]] = phi i8 [ [[S_L3]], %[[L3]] ], [ [[S_L1]], %[[L2]] ], [ [[S_L6]], %[[L6]] ]
-  ; CHECK:  [[O_L7]] = phi i32 [ [[O_L3]], %[[L3]] ], [ [[O_L1]], %[[L2]] ], [ [[O_L6]], %[[L6]] ]
-  ; CHECK:  [[V_L7]] = phi double [ [[V_L3]], %[[L3]] ], [ [[V_L1]], %[[L2]] ], [ [[V_L6]], %[[L6]] ]
-  ; CHECK:  br i1 %{{.+}}, label %[[L1]], label %[[L8:.+]]
-  ; CHECK: [[L8]]:
+; CHECK-LABEL: define void @cached_shadows(
+; CHECK-SAME: double [[ARG:%.*]]) {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[I:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[I1:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[I2:%.*]] = bitcast ptr [[I]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[I]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    store i64 0, ptr [[TMP4]], align 1
+; CHECK-NEXT:    store volatile double 1.000000e+00, ptr [[I]], align 8
+; CHECK-NEXT:    [[I3:%.*]] = bitcast ptr [[I1]] to ptr
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[I1]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    store i64 0, ptr [[TMP7]], align 1
+; CHECK-NEXT:    store volatile double 2.000000e+00, ptr [[I1]], align 8
+; CHECK-NEXT:    br label %[[BB4:.*]]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i8 [ 0, %[[BB]] ], [ [[TMP76:%.*]], %[[BB16:.*]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP77:%.*]], %[[BB16]] ]
+; CHECK-NEXT:    [[I5:%.*]] = phi double [ 3.000000e+00, %[[BB]] ], [ [[I17:%.*]], %[[BB16]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i8 [ 0, %[[BB]] ], [ [[TMP78:%.*]], %[[BB16]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP79:%.*]], %[[BB16]] ]
+; CHECK-NEXT:    [[I6:%.*]] = phi double [ 4.000000e+00, %[[BB]] ], [ [[I18:%.*]], %[[BB16]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[I1]] to i64
+; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080
+; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP13]], 17592186044416
+; CHECK-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 8
+; CHECK-NEXT:    [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 1
+; CHECK-NEXT:    [[TMP19:%.*]] = shl i64 [[TMP18]], 32
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i64 1
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 8
+; CHECK-NEXT:    [[TMP22:%.*]] = lshr i64 [[TMP18]], 32
+; CHECK-NEXT:    [[TMP23:%.*]] = or i64 [[TMP18]], [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = lshr i64 [[TMP23]], 16
+; CHECK-NEXT:    [[TMP25:%.*]] = or i64 [[TMP23]], [[TMP24]]
+; CHECK-NEXT:    [[TMP26:%.*]] = lshr i64 [[TMP25]], 8
+; CHECK-NEXT:    [[TMP27:%.*]] = or i64 [[TMP25]], [[TMP26]]
+; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[TMP27]] to i8
+; CHECK-NEXT:    [[TMP29:%.*]] = icmp ne i64 [[TMP19]], 0
+; CHECK-NEXT:    [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP17]], i32 [[TMP21]]
+; CHECK-NEXT:    [[I7:%.*]] = load volatile double, ptr [[I1]], align 8
+; CHECK-NEXT:    [[I8:%.*]] = fcmp une double [[I7]], 0.000000e+00
+; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[I1]] to i64
+; CHECK-NEXT:    [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080
+; CHECK-NEXT:    [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr
+; CHECK-NEXT:    [[TMP34:%.*]] = add i64 [[TMP32]], 17592186044416
+; CHECK-NEXT:    [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 8
+; CHECK-NEXT:    [[TMP37:%.*]] = load i64, ptr [[TMP33]], align 1
+; CHECK-NEXT:    [[TMP38:%.*]] = shl i64 [[TMP37]], 32
+; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr i32, ptr [[TMP35]], i64 1
+; CHECK-NEXT:    [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 8
+; CHECK-NEXT:    [[TMP41:%.*]] = lshr i64 [[TMP37]], 32
+; CHECK-NEXT:    [[TMP42:%.*]] = or i64 [[TMP37]], [[TMP41]]
+; CHECK-NEXT:    [[TMP43:%.*]] = lshr i64 [[TMP42]], 16
+; CHECK-NEXT:    [[TMP44:%.*]] = or i64 [[TMP42]], [[TMP43]]
+; CHECK-NEXT:    [[TMP45:%.*]] = lshr i64 [[TMP44]], 8
+; CHECK-NEXT:    [[TMP46:%.*]] = or i64 [[TMP44]], [[TMP45]]
+; CHECK-NEXT:    [[TMP47:%.*]] = trunc i64 [[TMP46]] to i8
+; CHECK-NEXT:    [[TMP48:%.*]] = icmp ne i64 [[TMP38]], 0
+; CHECK-NEXT:    [[TMP49:%.*]] = select i1 [[TMP48]], i32 [[TMP36]], i32 [[TMP40]]
+; CHECK-NEXT:    [[I9:%.*]] = load volatile double, ptr [[I1]], align 8
+; CHECK-NEXT:    br i1 [[I8]], label %[[BB10:.*]], label %[[BB14:.*]]
+; CHECK:       [[BB10]]:
+; CHECK-NEXT:    [[I11:%.*]] = fcmp une double [[I9]], 0.000000e+00
+; CHECK-NEXT:    br i1 [[I11]], label %[[BB12:.*]], label %[[BB16]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[TMP50:%.*]] = or i8 [[TMP10]], [[TMP1]]
+; CHECK-NEXT:    [[TMP51:%.*]] = icmp ne i8 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP0]], i32 [[TMP11]]
+; CHECK-NEXT:    [[I13:%.*]] = fsub double [[I6]], [[ARG]]
+; CHECK-NEXT:    br label %[[BB16]]
+; CHECK:       [[BB14]]:
+; CHECK-NEXT:    [[TMP53:%.*]] = ptrtoint ptr [[I]] to i64
+; CHECK-NEXT:    [[TMP54:%.*]] = xor i64 [[TMP53]], 87960930222080
+; CHECK-NEXT:    [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr
+; CHECK-NEXT:    [[TMP56:%.*]] = add i64 [[TMP54]], 17592186044416
+; CHECK-NEXT:    [[TMP57:%.*]] = inttoptr i64 [[TMP56]] to ptr
+; CHECK-NEXT:    [[TMP58:%.*]] = insertelement <8 x i8> poison, i8 [[TMP47]], i32 0
+; CHECK-NEXT:    [[TMP59:%.*]] = insertelement <8 x i8> [[TMP58]], i8 [[TMP47]], i32 1
+; CHECK-NEXT:    [[TMP60:%.*]] = insertelement <8 x i8> [[TMP59]], i8 [[TMP47]], i32 2
+; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP60]], i8 [[TMP47]], i32 3
+; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <8 x i8> [[TMP61]], i8 [[TMP47]], i32 4
+; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP47]], i32 5
+; CHECK-NEXT:    [[TMP64:%.*]] = insertelement <8 x i8> [[TMP63]], i8 [[TMP47]], i32 6
+; CHECK-NEXT:    [[TMP65:%.*]] = insertelement <8 x i8> [[TMP64]], i8 [[TMP47]], i32 7
+; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr <8 x i8>, ptr [[TMP55]], i32 0
+; CHECK-NEXT:    store <8 x i8> [[TMP65]], ptr [[TMP66]], align 1
+; CHECK-NEXT:    [[_DFSCMP:%.*]] = icmp ne i8 [[TMP47]], 0
+; CHECK-NEXT:    br i1 [[_DFSCMP]], label %[[BB67:.*]], label %[[BB72:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       [[BB67]]:
+; CHECK-NEXT:    [[TMP68:%.*]] = call i32 @__dfsan_chain_origin(i32 [[TMP49]])
+; CHECK-NEXT:    [[TMP69:%.*]] = zext i32 [[TMP68]] to i64
+; CHECK-NEXT:    [[TMP70:%.*]] = shl i64 [[TMP69]], 32
+; CHECK-NEXT:    [[TMP71:%.*]] = or i64 [[TMP69]], [[TMP70]]
+; CHECK-NEXT:    store i64 [[TMP71]], ptr [[TMP57]], align 8
+; CHECK-NEXT:    br label %[[BB72]]
+; CHECK:       [[BB72]]:
+; CHECK-NEXT:    store volatile double [[I9]], ptr [[I]], align 8
+; CHECK-NEXT:    [[TMP73:%.*]] = or i8 [[TMP10]], [[TMP1]]
+; CHECK-NEXT:    [[TMP74:%.*]] = icmp ne i8 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP0]], i32 [[TMP11]]
+; CHECK-NEXT:    [[I15:%.*]] = fadd double [[I6]], [[ARG]]
+; CHECK-NEXT:    br label %[[BB16]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[TMP76]] = phi i8 [ [[TMP10]], %[[BB12]] ], [ [[TMP8]], %[[BB10]] ], [ [[TMP10]], %[[BB72]] ]
+; CHECK-NEXT:    [[TMP77]] = phi i32 [ [[TMP11]], %[[BB12]] ], [ [[TMP9]], %[[BB10]] ], [ [[TMP11]], %[[BB72]] ]
+; CHECK-NEXT:    [[I17]] = phi double [ [[I6]], %[[BB12]] ], [ [[I5]], %[[BB10]] ], [ [[I6]], %[[BB72]] ]
+; CHECK-NEXT:    [[TMP78]] = phi i8 [ [[TMP50]], %[[BB12]] ], [ [[TMP10]], %[[BB10]] ], [ [[TMP73]], %[[BB72]] ]
+; CHECK-NEXT:    [[TMP79]] = phi i32 [ [[TMP52]], %[[BB12]] ], [ [[TMP11]], %[[BB10]] ], [ [[TMP75]], %[[BB72]] ]
+; CHECK-NEXT:    [[I18]] = phi double [ [[I13]], %[[BB12]] ], [ [[I6]], %[[BB10]] ], [ [[I15]], %[[BB72]] ]
+; CHECK-NEXT:    [[I19:%.*]] = fcmp olt double [[I17]], 9.900000e+01
+; CHECK-NEXT:    br i1 [[I19]], label %[[BB4]], label %[[BB20:.*]]
+; CHECK:       [[BB20]]:
+; CHECK-NEXT:    ret void
+;
 bb:
   %i = alloca double, align 8
   %i1 = alloca double, align 8
@@ -83,3 +170,6 @@ bb16:                                             ; preds = %bb14, %bb12, %bb10
 bb20:                                             ; preds = %bb16
   ret void
 }
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll
index 5ee9927..9e8d015 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_call.ll
@@ -37,8 +37,8 @@ i1 %a200
 define i1 @param_overflow(i1 %a) {
   ; CHECK: @param_overflow.dfsan
   ; CHECK: store i32 %1, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 199), align 4
-  ; CHECK-NEXT: store i8 %2, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 398) to ptr), align 2
-  ; CHECK-NEXT: store i8 %2, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 400) to ptr), align 2
+  ; CHECK-NEXT: store i8 %2, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 398), align 2
+  ; CHECK-NEXT: store i8 %2, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 400), align 2
   ; CHECK-NEXT: %r = call i1 @arg_overflow.dfsan
   ; CHECK: %_dfsret_o = load i32, ptr @__dfsan_retval_origin_tls, align 4
   ; CHECK: store i32 %_dfsret_o, ptr @__dfsan_retval_origin_tls, align 4
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
index 0c84c79..a0c642a 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_load.ll
@@ -93,7 +93,7 @@ define i16 @load16(i1 %i, ptr %p) {
   ; CHECK-LABEL: @load16.dfsan
 
   ; COMBINE_LOAD_PTR-NEXT: %[[#PO:]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; COMBINE_LOAD_PTR-NEXT: %[[#PS:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+  ; COMBINE_LOAD_PTR-NEXT: %[[#PS:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
 
   ; CHECK-NEXT:            %[[#INTP:]] = ptrtoint ptr %p to i64
   ; CHECK-NEXT:            %[[#SHADOW_OFFSET:]] = xor i64 %[[#INTP]], [[#MASK]]
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll
index f8adb01..f4f3cb5 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_mem_intrinsic.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1  -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -7,32 +8,54 @@ declare void @llvm.memmove.p0.p0.i32(ptr, ptr, i32, i1)
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1)
 
 define void @memcpy(ptr %d, ptr %s, i32 %l) {
-  ; CHECK: @memcpy.dfsan
-  ; CHECK: [[L64:%.*]] = zext i32 %l to i64
-  ; CHECK: call void @__dfsan_mem_origin_transfer(ptr %d, ptr %s, i64 [[L64]])
-  ; CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 1 {{.*}}, ptr align 1 {{.*}}, i32 {{.*}}, i1 false)
-  ; CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %d, ptr %s, i32 %l, i1 false)
-
+; CHECK-LABEL: define void @memcpy(
+; CHECK-SAME: ptr [[D:%.*]], ptr [[S:%.*]], i32 [[L:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[L]] to i64
+; CHECK-NEXT:    call void @__dfsan_mem_origin_transfer(ptr [[D]], ptr [[S]], i64 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[S]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[L]], 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP4]], ptr align 1 [[TMP7]], i32 [[TMP8]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[D]], ptr [[S]], i32 [[L]], i1 false)
+; CHECK-NEXT:    ret void
+;
   call void @llvm.memcpy.p0.p0.i32(ptr %d, ptr %s, i32 %l, i1 0)
   ret void
 }
 
 define void @memmove(ptr %d, ptr %s, i32 %l) {
-  ; CHECK: @memmove.dfsan
-  ; CHECK: [[L64:%.*]] = zext i32 %l to i64
-  ; CHECK: call void @__dfsan_mem_origin_transfer(ptr %d, ptr %s, i64 [[L64]])
-  ; CHECK: call void @llvm.memmove.p0.p0.i32(ptr align 1 {{.*}}, ptr align 1 {{.*}}, i32 {{.*}}, i1 false)
-  ; CHECK: call void @llvm.memmove.p0.p0.i32(ptr %d, ptr %s, i32 %l, i1 false)
-
+; CHECK-LABEL: define void @memmove(
+; CHECK-SAME: ptr [[D:%.*]], ptr [[S:%.*]], i32 [[L:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[L]] to i64
+; CHECK-NEXT:    call void @__dfsan_mem_origin_transfer(ptr [[D]], ptr [[S]], i64 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[S]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[L]], 1
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i32(ptr align 1 [[TMP4]], ptr align 1 [[TMP7]], i32 [[TMP8]], i1 false)
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i32(ptr [[D]], ptr [[S]], i32 [[L]], i1 false)
+; CHECK-NEXT:    ret void
+;
   call void @llvm.memmove.p0.p0.i32(ptr %d, ptr %s, i32 %l, i1 0)
   ret void
 }
 
 define void @memset(ptr %p, i8 %v) {
-  ; CHECK: @memset.dfsan
-  ; CHECK: [[O:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[S:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
-  ; CHECK: call void @__dfsan_set_label(i8 [[S]], i32 [[O]], ptr %p, i64 1)
+; CHECK-LABEL: define void @memset(
+; CHECK-SAME: ptr [[P:%.*]], i8 [[V:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    call void @__dfsan_set_label(i8 [[TMP2]], i32 [[TMP1]], ptr [[P]], i64 1)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[P]], i8 [[V]], i64 1, i1 true)
+; CHECK-NEXT:    ret void
+;
   call void @llvm.memset.p0.i64(ptr %p, i8 %v, i64 1, i1 1)
   ret void
-}
-\ No newline at end of file
+}
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll
index 3b10204..f409143 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_other_ops.ll
@@ -1,140 +1,200 @@
-; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1  -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]]
-; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [[TLS_ARR]]
 define float @unop(float %f) {
-  ; CHECK: @unop.dfsan
-  ; CHECK: [[FO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: store i32 [[FO]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define float @unop(
+; CHECK-SAME: float [[F:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[R:%.*]] = fneg float [[F]]
+; CHECK-NEXT:    store i8 [[TMP2]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP1]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret float [[R]]
+;
   %r = fneg float %f
   ret float %r
 }
 
 define i1 @binop(i1 %a, i1 %b) {
-  ; CHECK: @binop.dfsan
-  ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
-  ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS]], 0
-  ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]]
-  ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define i1 @binop(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = or i8 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    [[R:%.*]] = add i1 [[A]], [[B]]
+; CHECK-NEXT:    store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP7]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret i1 [[R]]
+;
   %r = add i1 %a, %b
   ret i1 %r
 }
 
 define i8 @castop(ptr %p) {
-  ; CHECK: @castop.dfsan
-  ; CHECK: [[PO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: store i32 [[PO]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define i8 @castop(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[R:%.*]] = ptrtoint ptr [[P]] to i8
+; CHECK-NEXT:    store i8 [[TMP2]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP1]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret i8 [[R]]
+;
   %r = ptrtoint ptr %p to i8
   ret i8 %r
 }
 
 define i1 @cmpop(i1 %a, i1 %b) {
-  ; CHECK: @cmpop.dfsan
-  ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
-  ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS]], 0
-  ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]]
-  ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define i1 @cmpop(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = or i8 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i1 [[A]], [[B]]
+; CHECK-NEXT:    store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP7]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret i1 [[R]]
+;
   %r = icmp eq i1 %a, %b
   ret i1 %r
 }
 
 define ptr @gepop(ptr %p, i32 %a, i32 %b, i32 %c) {
-  ; CHECK: @gepop.dfsan
-  ; CHECK: [[CO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 3), align 4
-  ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4
-  ; CHECK: [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[PO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: [[CS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 6) to ptr), align 2
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2
-  ; CHECK: [[AS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
-  ; CHECK: [[AS_NE:%.*]] = icmp ne i8 [[AS]], 0
-  ; CHECK: [[APO:%.*]] = select i1 [[AS_NE]], i32 [[AO]], i32 [[PO]]
-  ; CHECK: [[BS_NE:%.*]] = icmp ne i8 [[BS]], 0
-  ; CHECK: [[ABPO:%.*]] = select i1 [[BS_NE]], i32 [[BO]], i32 [[APO]]
-  ; CHECK: [[CS_NE:%.*]] = icmp ne i8 [[CS]], 0
-  ; CHECK: [[ABCPO:%.*]] = select i1 [[CS_NE]], i32 [[CO]], i32 [[ABPO]]
-  ; CHECK: store i32 [[ABCPO]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define ptr @gepop(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 3), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 6), align 2
+; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP9:%.*]] = or i8 [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = or i8 [[TMP9]], [[TMP6]]
+; CHECK-NEXT:    [[TMP11:%.*]] = or i8 [[TMP10]], [[TMP5]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP3]], i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i8 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP2]], i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i8 [[TMP5]], 0
+; CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP1]], i32 [[TMP15]]
+; CHECK-NEXT:    [[E:%.*]] = getelementptr [10 x [20 x i32]], ptr [[P]], i32 [[A]], i32 [[B]], i32 [[C]]
+; CHECK-NEXT:    store i8 [[TMP11]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP17]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret ptr [[E]]
+;
   %e = getelementptr [10 x [20 x i32]], ptr %p, i32 %a, i32 %b, i32 %c
   ret ptr %e
 }
 
 define i32 @eeop(<4 x i32> %a, i32 %b) {
-  ; CHECK: @eeop.dfsan
-  ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
-  ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS]], 0
-  ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]]
-  ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define i32 @eeop(
+; CHECK-SAME: <4 x i32> [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = or i8 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x i32> [[A]], i32 [[B]]
+; CHECK-NEXT:    store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP7]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret i32 [[E]]
+;
   %e = extractelement <4 x i32> %a, i32 %b
   ret i32 %e
 }
 
 define <4 x i32> @ieop(<4 x i32> %p, i32 %a, i32 %b) {
-  ; CHECK: @ieop.dfsan
-  ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4
-  ; CHECK: [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[PO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2
-  ; CHECK: [[AS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
-  ; CHECK: [[AS_NE:%.*]] = icmp ne i8 [[AS]], 0
-  ; CHECK: [[APO:%.*]] = select i1 [[AS_NE]], i32 [[AO]], i32 [[PO]]
-  ; CHECK: [[BS_NE:%.*]] = icmp ne i8 [[BS]], 0
-  ; CHECK: [[ABPO:%.*]] = select i1 [[BS_NE]], i32 [[BO]], i32 [[APO]]
-  ; CHECK: store i32 [[ABPO]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define <4 x i32> @ieop(
+; CHECK-SAME: <4 x i32> [[P:%.*]], i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP7:%.*]] = or i8 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or i8 [[TMP7]], [[TMP4]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i8 [[TMP5]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP2]], i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP1]], i32 [[TMP10]]
+; CHECK-NEXT:    [[E:%.*]] = insertelement <4 x i32> [[P]], i32 [[A]], i32 [[B]]
+; CHECK-NEXT:    store i8 [[TMP8]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP12]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret <4 x i32> [[E]]
+;
   %e = insertelement <4 x i32> %p, i32 %a, i32 %b
   ret <4 x i32> %e
 }
 
 define <4 x i32> @svop(<4 x i32> %a, <4 x i32> %b) {
-  ; CHECK: @svop.dfsan
-  ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
-  ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS]], 0
-  ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]]
-  ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4
-  
+; CHECK-LABEL: define <4 x i32> @svop(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = or i8 [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    [[E:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+; CHECK-NEXT:    store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP7]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret <4 x i32> [[E]]
+;
   %e = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   ret <4 x i32> %e
-}  
+}
 
 define i32 @evop({i32, float} %a) {
-  ; CHECK: @evop.dfsan
-  ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: store i32 [[AO]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define i32 @evop(
+; CHECK-SAME: { i32, float } [[A:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i8, i8 } [[TMP2]], 0
+; CHECK-NEXT:    [[E:%.*]] = extractvalue { i32, float } [[A]], 0
+; CHECK-NEXT:    store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP1]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret i32 [[E]]
+;
   %e = extractvalue {i32, float} %a, 0
   ret i32 %e
 }
 
+; COMM: TODO simplify the expression 4 to
+; COMM: 6, if shadow-tls-alignment is updated to match shadow
 define {i32, {float, float}} @ivop({i32, {float, float}} %a, {float, float} %b) {
-  ; CHECK: @ivop.dfsan
-  ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; COMM: TODO simplify the expression 4 to
-  ; COMM: 6, if shadow-tls-alignment is updated to match shadow
-  ; CHECK: [[BS:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2
-  ; CHECK: [[BS0:%.*]] = extractvalue { i8, i8 } [[BS]], 0
-  ; CHECK: [[BS1:%.*]] = extractvalue { i8, i8 } [[BS]], 1
-  ; CHECK: [[BS01:%.*]] = or i8 [[BS0]], [[BS1]]
-  ; CHECK: [[NE:%.*]] = icmp ne i8 [[BS01]], 0
-  ; CHECK: [[MO:%.*]] = select i1 [[NE]], i32 [[BO]], i32 [[AO]]
-  ; CHECK: store i32 [[MO]], ptr @__dfsan_retval_origin_tls, align 4
-  
+; CHECK-LABEL: define { i32, { float, float } } @ivop(
+; CHECK-SAME: { i32, { float, float } } [[A:%.*]], { float, float } [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load { i8, { i8, i8 } }, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = insertvalue { i8, { i8, i8 } } [[TMP4]], { i8, i8 } [[TMP3]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i8, i8 } [[TMP3]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i8, i8 } [[TMP3]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = or i8 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i8 [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    [[E:%.*]] = insertvalue { i32, { float, float } } [[A]], { float, float } [[B]], 1
+; CHECK-NEXT:    store { i8, { i8, i8 } } [[TMP5]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP10]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret { i32, { float, float } } [[E]]
+;
   %e = insertvalue {i32, {float, float}} %a, {float, float} %b, 1
   ret {i32, {float, float}} %e
 }
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_phi.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_phi.ll
index e98dd2b..b69c383 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_phi.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_phi.ll
@@ -1,41 +1,50 @@
-; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1  -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1 -dfsan-add-global-name-suffix=0 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]]
 define i32 @phiop(i32 %a, i32 %b, i1 %c) {
-  ; CHECK: @phiop.dfsan
-  ; CHECK: entry:
-  ; CHECK: [[BO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[AO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK: [[BS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
-  ; CHECK: [[AS:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; CHECK: br i1 %c, label %next, label %done
-  ; CHECK: next:
-  ; CHECK: br i1 %c, label %T, label %F
-  ; CHECK: T:
-  ; CHECK: [[BS_NE:%.*]] = icmp ne i8 [[BS]], 0
-  ; CHECK: [[BAO_T:%.*]] = select i1 [[BS_NE]], i32 [[BO]], i32 [[AO]]
-  ; CHECK: br label %done
-  ; CHECK: F:
-  ; CHECK: [[AS_NE:%.*]] = icmp ne i8 [[AS]], 0
-  ; CHECK: [[BAO_F:%.*]] = select i1 [[AS_NE]], i32 [[AO]], i32 [[BO]]
-  ; CHECK: br label %done
-  ; CHECK: done:
-  ; CHECK: [[PO:%.*]] = phi i32 [ [[BAO_T]], %T ], [ [[BAO_F]], %F ], [ [[AO]], %entry ]
-  ; CHECK: store i32 [[PO]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define i32 @phiop(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    br i1 [[C]], label %[[NEXT:.*]], label %[[DONE:.*]]
+; CHECK:       [[NEXT]]:
+; CHECK-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CHECK:       [[T]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = or i8 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP0]], i32 [[TMP1]]
+; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[A]], [[B]]
+; CHECK-NEXT:    br label %[[DONE]]
+; CHECK:       [[F]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = or i8 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP1]], i32 [[TMP0]]
+; CHECK-NEXT:    [[DIFF:%.*]] = sub i32 [[B]], [[A]]
+; CHECK-NEXT:    br label %[[DONE]]
+; CHECK:       [[DONE]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP4]], %[[T]] ], [ [[TMP7]], %[[F]] ], [ [[TMP3]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ [[TMP6]], %[[T]] ], [ [[TMP9]], %[[F]] ], [ [[TMP1]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[SUM]], %[[T]] ], [ [[DIFF]], %[[F]] ], [ [[A]], %[[ENTRY]] ]
+; CHECK-NEXT:    store i8 [[TMP10]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP11]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret i32 [[R]]
+;
 entry:
   br i1 %c, label %next, label %done
-next:  
-  br i1 %c, label %T, label %F 
+next:
+  br i1 %c, label %T, label %F
 T:
-  %sum = add i32 %a, %b 
+  %sum = add i32 %a, %b
   br label %done
 F:
-  %diff = sub i32 %b, %a 
+  %diff = sub i32 %b, %a
   br label %done
 done:
   %r = phi i32 [%sum, %T], [%diff, %F], [%a, %entry]
   ret i32 %r
-}
-\ No newline at end of file
+}
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll
index 133bf22..2839897 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_select.ll
@@ -48,7 +48,7 @@ define <4 x i8> @select8v(<4 x i1> %c, <4 x i8> %t, <4 x i8> %f) {
   ; TRACK_CONTROL_FLOW: [[CO:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
   ; TRACK_CONTROL_FLOW: [[FO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4
   ; TRACK_CONTROL_FLOW: [[TO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; TRACK_CONTROL_FLOW: [[FS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2
+  ; TRACK_CONTROL_FLOW: [[FS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
   ; TRACK_CONTROL_FLOW: [[CS:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
   ; TRACK_CONTROL_FLOW: [[FS_NE:%.*]] = icmp ne i8 [[FS]], 0
   ; TRACK_CONTROL_FLOW: [[FTO:%.*]] = select i1 [[FS_NE]], i32 [[FO]], i32 [[TO]]
@@ -59,11 +59,11 @@ define <4 x i8> @select8v(<4 x i1> %c, <4 x i8> %t, <4 x i8> %f) {
   ; NO_TRACK_CONTROL_FLOW: @select8v.dfsan
   ; NO_TRACK_CONTROL_FLOW: [[FO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 2), align 4
   ; NO_TRACK_CONTROL_FLOW: [[TO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; NO_TRACK_CONTROL_FLOW: [[FS:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align 2
+  ; NO_TRACK_CONTROL_FLOW: [[FS:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
   ; NO_TRACK_CONTROL_FLOW: [[FS_NE:%.*]] = icmp ne i8 [[FS]], 0
   ; NO_TRACK_CONTROL_FLOW: [[FTO:%.*]] = select i1 [[FS_NE]], i32 [[FO]], i32 [[TO]]
   ; NO_TRACK_CONTROL_FLOW: store i32 [[FTO]], ptr @__dfsan_retval_origin_tls, align 4
 
   %a = select <4 x i1> %c, <4 x i8> %t, <4 x i8> %f
   ret <4 x i8> %a
-}
-\ No newline at end of file
+}
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
index 0b0ba40..55b0a01 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store.ll
@@ -75,7 +75,7 @@ define void @store64_align8(ptr %p, i64 %a) {
   ; COMBINE_STORE_PTR-NEXT: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
 
   ; CHECK-NEXT:  %[[#AO:]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK-NEXT:  %[[#AS:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+  ; CHECK-NEXT:  %[[#AS:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
 
   ; COMBINE_STORE_PTR-NEXT: %[[#AS:]] = or i8 %[[#AS]], %[[#PS]]
   ; COMBINE_STORE_PTR-NEXT: %[[#NE:]] = icmp ne i8 %[[#PS]], 0
@@ -104,7 +104,7 @@ define void @store64_align2(ptr %p, i64 %a) {
   ; COMBINE_STORE_PTR-NEXT: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
 
   ; CHECK-NEXT: %[[#AO:]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK-NEXT: %[[#AS:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+  ; CHECK-NEXT: %[[#AS:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
 
   ; COMBINE_STORE_PTR-NEXT: %[[#AS:]] = or i8 %[[#AS]], %[[#PS]]
   ; COMBINE_STORE_PTR-NEXT: %[[#NE:]] = icmp ne i8 %[[#PS]], 0
@@ -131,7 +131,7 @@ define void @store96_align8(ptr %p, i96 %a) {
   ; COMBINE_STORE_PTR-NEXT: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
 
   ; CHECK-NEXT: %[[#AO:]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK-NEXT: %[[#AS:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+  ; CHECK-NEXT: %[[#AS:]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
 
   ; COMBINE_STORE_PTR-NEXT: %[[#AS:]] = or i8 %[[#AS]], %[[#PS]]
   ; COMBINE_STORE_PTR-NEXT: %[[#NE:]] = icmp ne i8 %[[#PS]], 0
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll
index 3630ebc..8b526f1 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll
@@ -1,16 +1,37 @@
-; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1  -dfsan-instrument-with-call-threshold=0 -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-track-origins=1  -dfsan-instrument-with-call-threshold=0 -dfsan-add-global-name-suffix=0 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define void @store_threshold(ptr %p, [2 x i64] %a) {
-  ; CHECK: @store_threshold.dfsan
-  ; CHECK: [[AO:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[AS:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
-  ; CHECK: [[AS0:%.*]] = extractvalue [2 x i8] [[AS]], 0
-  ; CHECK: [[AS1:%.*]] = extractvalue [2 x i8] [[AS]], 1
-  ; CHECK: [[AS01:%.*]] = or i8 [[AS0]], [[AS1]]
-  ; CHECK: call void @__dfsan_maybe_store_origin(i8 [[AS01]], ptr %p, i64 16, i32 [[AO]])
-  ; CHECK: store [2 x i64] %a, ptr %p, align 8
+; CHECK-LABEL: define void @store_threshold(
+; CHECK-SAME: ptr [[P:%.*]], [2 x i64] [[A:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([200 x i32], ptr @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [2 x i8] [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [2 x i8] [[TMP2]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or i8 [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP7]], 17592186044416
+; CHECK-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP11]], i8 [[TMP5]], i32 1
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x i8> [[TMP12]], i8 [[TMP5]], i32 2
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x i8> [[TMP14]], i8 [[TMP5]], i32 4
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP5]], i32 5
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <8 x i8> [[TMP16]], i8 [[TMP5]], i32 6
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP17]], i8 [[TMP5]], i32 7
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr <8 x i8>, ptr [[TMP8]], i32 0
+; CHECK-NEXT:    store <8 x i8> [[TMP18]], ptr [[TMP19]], align 1
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr <8 x i8>, ptr [[TMP8]], i32 1
+; CHECK-NEXT:    store <8 x i8> [[TMP18]], ptr [[TMP20]], align 1
+; CHECK-NEXT:    call void @__dfsan_maybe_store_origin(i8 [[TMP5]], ptr [[P]], i64 16, i32 [[TMP1]])
+; CHECK-NEXT:    store [2 x i64] [[A]], ptr [[P]], align 8
+; CHECK-NEXT:    ret void
+;
 
   store [2 x i64] %a, ptr %p
   ret void
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll
index b93d2eb..f967ccf 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll
@@ -1,27 +1,26 @@
-; RUN: opt < %s -passes=dfsan -dfsan-track-origins=2 -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-track-origins=2 -dfsan-add-global-name-suffix=0 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define i64 @load64(ptr %p) {
-  ; CHECK-LABEL: @load64.dfsan
-
-  ; CHECK-NEXT: %[[#PO:]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
-  ; CHECK-NEXT: %[[#PS:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]]
-
-  ; CHECK-NEXT: %[[#LABEL_ORIGIN:]] = call zeroext i64 @__dfsan_load_label_and_origin(ptr %p, i64 8)
-  ; CHECK-NEXT: %[[#LABEL_ORIGIN_H32:]] = lshr i64 %[[#LABEL_ORIGIN]], 32
-  ; CHECK-NEXT: %[[#LABEL:]] = trunc i64 %[[#LABEL_ORIGIN_H32]] to i8
-  ; CHECK-NEXT: %[[#ORIGIN:]] = trunc i64 %[[#LABEL_ORIGIN]] to i32
-  ; CHECK-NEXT: %[[#ORIGIN_CHAINED:]] = call i32 @__dfsan_chain_origin_if_tainted(i8 %[[#LABEL]], i32 %[[#ORIGIN]])
-
-  ; CHECK-NEXT: %[[#LABEL:]] = or i8 %[[#LABEL]], %[[#PS]]
-  ; CHECK-NEXT: %[[#NZ:]] = icmp ne i8 %[[#PS]], 0
-  ; CHECK-NEXT: %[[#ORIGIN_SEL:]] = select i1 %[[#NZ]], i32 %[[#PO]], i32 %[[#ORIGIN_CHAINED]]
-
-  ; CHECK-NEXT: %a = load i64, ptr %p
-  ; CHECK-NEXT: store i8 %[[#LABEL]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; CHECK-NEXT: store i32 %[[#ORIGIN_SEL]], ptr @__dfsan_retval_origin_tls, align 4
-
+; CHECK-LABEL: define i64 @load64(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__dfsan_arg_origin_tls, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = call zeroext i64 @__dfsan_load_label_and_origin(ptr [[P]], i64 8)
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i64 [[TMP3]], 32
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i8
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @__dfsan_chain_origin_if_tainted(i8 [[TMP5]], i32 [[TMP6]])
+; CHECK-NEXT:    [[TMP8:%.*]] = or i8 [[TMP5]], [[TMP2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i8 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP1]], i32 [[TMP7]]
+; CHECK-NEXT:    [[A:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:    store i8 [[TMP8]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i32 [[TMP10]], ptr @__dfsan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret i64 [[A]]
+;
   %a = load i64, ptr %p
   ret i64 %a
 }
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/phi.ll b/llvm/test/Instrumentation/DataFlowSanitizer/phi.ll
index 592d3eb..ecf0d9c8 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/phi.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/phi.ll
@@ -1,26 +1,41 @@
-; RUN: opt < %s -passes=dfsan -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-add-global-name-suffix=0 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define {i32, i32} @test({i32, i32} %a, i1 %c) {
-  ; CHECK: %[[#AL:]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN:2]]
-  ; CHECK: %[[#AL0:]] = insertvalue { i8, i8 } %[[#AL]], i8 0, 0
-  ; CHECK: %[[#AL1:]] = insertvalue { i8, i8 } %[[#AL]], i8 0, 1
-  ; CHECK: %[[#PL:]] = phi { i8, i8 } [ %[[#AL0]], %T ], [ %[[#AL1]], %F ]
-  ; CHECK: store { i8, i8 } %[[#PL]], ptr @__dfsan_retval_tls, align [[ALIGN]]
+; CHECK-LABEL: define { i32, i32 } @test(
+; CHECK-SAME: { i32, i32 } [[A:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
+; CHECK:       [[T]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i8, i8 } [[TMP0]], i8 0, 0
+; CHECK-NEXT:    [[AT:%.*]] = insertvalue { i32, i32 } [[A]], i32 1, 0
+; CHECK-NEXT:    br label %[[DONE:.*]]
+; CHECK:       [[F]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i8, i8 } [[TMP0]], i8 0, 1
+; CHECK-NEXT:    [[AF:%.*]] = insertvalue { i32, i32 } [[A]], i32 1, 1
+; CHECK-NEXT:    br label %[[DONE]]
+; CHECK:       [[DONE]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi { i8, i8 } [ [[TMP1]], %[[T]] ], [ [[TMP2]], %[[F]] ]
+; CHECK-NEXT:    [[B:%.*]] = phi { i32, i32 } [ [[AT]], %[[T]] ], [ [[AF]], %[[F]] ]
+; CHECK-NEXT:    store { i8, i8 } [[TMP3]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret { i32, i32 } [[B]]
+;
 
 entry:
   br i1 %c, label %T, label %F
-  
+
 T:
   %at = insertvalue {i32, i32} %a, i32 1, 0
   br label %done
-  
+
 F:
   %af = insertvalue {i32, i32} %a, i32 1, 1
   br label %done
-  
+
 done:
   %b = phi {i32, i32} [%at, %T], [%af, %F]
-  ret {i32, i32} %b  
+  ret {i32, i32} %b
 }
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/select.ll b/llvm/test/Instrumentation/DataFlowSanitizer/select.ll
index 5056616..005648b 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/select.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/select.ll
@@ -1,74 +1,81 @@
-; RUN: opt < %s -passes=dfsan -dfsan-track-select-control-flow=true -S | FileCheck %s --check-prefixes=CHECK,TRACK_CF
-; RUN: opt < %s -passes=dfsan -dfsan-track-select-control-flow=false -S | FileCheck %s --check-prefixes=CHECK,NO_TRACK_CF
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-track-select-control-flow=true -dfsan-add-global-name-suffix=0 -S | FileCheck %s --check-prefixes=CHECK,TRACK_CF
+; RUN: opt < %s -passes=dfsan -dfsan-track-select-control-flow=false -dfsan-add-global-name-suffix=0 -S | FileCheck %s --check-prefixes=CHECK,NO_TRACK_CF
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]]
-; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [[TLS_ARR]]
 define i8 @select8(i1 %c, i8 %t, i8 %f) {
-  ; TRACK_CF: @select8.dfsan
-  ; TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]]
-  ; TRACK_CF: %[[#R+1]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
-  ; TRACK_CF: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; TRACK_CF: %[[#R+3]] = select i1 %c, i8 %[[#R+1]], i8 %[[#R]]
-  ; TRACK_CF: %[[#RO:]] = or i8 %[[#R+2]], %[[#R+3]]
-  ; TRACK_CF: %a = select i1 %c, i8 %t, i8 %f
-  ; TRACK_CF: store i8 %[[#RO]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; TRACK_CF: ret i8 %a
-
-  ; NO_TRACK_CF: @select8.dfsan
-  ; NO_TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]]
-  ; NO_TRACK_CF: %[[#R+1]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
-  ; NO_TRACK_CF: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; NO_TRACK_CF: %[[#R+3]] = select i1 %c, i8 %[[#R+1]], i8 %[[#R]]
-  ; NO_TRACK_CF: %a = select i1 %c, i8 %t, i8 %f
-  ; NO_TRACK_CF: store i8 %[[#R+3]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; NO_TRACK_CF: ret i8 %a
-
+; TRACK_CF-LABEL: define i8 @select8(
+; TRACK_CF-SAME: i1 [[C:%.*]], i8 [[T:%.*]], i8 [[F:%.*]]) {
+; TRACK_CF-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
+; TRACK_CF-NEXT:    [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; TRACK_CF-NEXT:    [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; TRACK_CF-NEXT:    [[TMP4:%.*]] = select i1 [[C]], i8 [[TMP2]], i8 [[TMP1]]
+; TRACK_CF-NEXT:    [[TMP5:%.*]] = or i8 [[TMP3]], [[TMP4]]
+; TRACK_CF-NEXT:    [[A:%.*]] = select i1 [[C]], i8 [[T]], i8 [[F]]
+; TRACK_CF-NEXT:    store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2
+; TRACK_CF-NEXT:    ret i8 [[A]]
+;
+; NO_TRACK_CF-LABEL: define i8 @select8(
+; NO_TRACK_CF-SAME: i1 [[C:%.*]], i8 [[T:%.*]], i8 [[F:%.*]]) {
+; NO_TRACK_CF-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
+; NO_TRACK_CF-NEXT:    [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; NO_TRACK_CF-NEXT:    [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; NO_TRACK_CF-NEXT:    [[TMP4:%.*]] = select i1 [[C]], i8 [[TMP2]], i8 [[TMP1]]
+; NO_TRACK_CF-NEXT:    [[A:%.*]] = select i1 [[C]], i8 [[T]], i8 [[F]]
+; NO_TRACK_CF-NEXT:    store i8 [[TMP4]], ptr @__dfsan_retval_tls, align 2
+; NO_TRACK_CF-NEXT:    ret i8 [[A]]
+;
   %a = select i1 %c, i8 %t, i8 %f
   ret i8 %a
 }
 
 define i8 @select8e(i1 %c, i8 %tf) {
-  ; TRACK_CF: @select8e.dfsan
-  ; TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
-  ; TRACK_CF: %[[#R+1]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; TRACK_CF: %[[#RO:]] = or i8 %[[#R+1]], %[[#R]]
-  ; TRACK_CF: %a = select i1 %c, i8 %tf, i8 %tf
-  ; TRACK_CF: store i8 %[[#RO]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; TRACK_CF: ret i8 %a
-
-  ; NO_TRACK_CF: @select8e.dfsan
-  ; NO_TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
-  ; NO_TRACK_CF: %[[#R+1]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; NO_TRACK_CF: %a = select i1 %c, i8 %tf, i8 %tf
-  ; NO_TRACK_CF: store i8 %[[#R]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; NO_TRACK_CF: ret i8 %a
-
+; TRACK_CF-LABEL: define i8 @select8e(
+; TRACK_CF-SAME: i1 [[C:%.*]], i8 [[TF:%.*]]) {
+; TRACK_CF-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; TRACK_CF-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; TRACK_CF-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]]
+; TRACK_CF-NEXT:    [[A:%.*]] = select i1 [[C]], i8 [[TF]], i8 [[TF]]
+; TRACK_CF-NEXT:    store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2
+; TRACK_CF-NEXT:    ret i8 [[A]]
+;
+; NO_TRACK_CF-LABEL: define i8 @select8e(
+; NO_TRACK_CF-SAME: i1 [[C:%.*]], i8 [[TF:%.*]]) {
+; NO_TRACK_CF-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; NO_TRACK_CF-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; NO_TRACK_CF-NEXT:    [[A:%.*]] = select i1 [[C]], i8 [[TF]], i8 [[TF]]
+; NO_TRACK_CF-NEXT:    store i8 [[TMP1]], ptr @__dfsan_retval_tls, align 2
+; NO_TRACK_CF-NEXT:    ret i8 [[A]]
+;
   %a = select i1 %c, i8 %tf, i8 %tf
   ret i8 %a
 }
 
 define <4 x i8> @select8v(<4 x i1> %c, <4 x i8> %t, <4 x i8> %f) {
-  ; TRACK_CF: @select8v.dfsan
-  ; TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]]
-  ; TRACK_CF: %[[#R+1]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
-  ; TRACK_CF: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; TRACK_CF: %[[#R+3]] = or i8 %[[#R+1]], %[[#R]]
-  ; TRACK_CF: %[[#RO:]] = or i8 %[[#R+2]], %[[#R+3]]
-  ; TRACK_CF: %a = select <4 x i1> %c, <4 x i8> %t, <4 x i8> %f
-  ; TRACK_CF: store i8 %[[#RO]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; TRACK_CF: ret <4 x i8> %a
-
-  ; NO_TRACK_CF: @select8v.dfsan
-  ; NO_TRACK_CF: %[[#R:]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]]
-  ; NO_TRACK_CF: %[[#R+1]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
-  ; NO_TRACK_CF: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; NO_TRACK_CF: %[[#RO:]] = or i8 %[[#R+1]], %[[#R]]
-  ; NO_TRACK_CF: %a = select <4 x i1> %c, <4 x i8> %t, <4 x i8> %f
-  ; NO_TRACK_CF: store i8 %[[#RO]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; NO_TRACK_CF: ret <4 x i8> %a
-
+; TRACK_CF-LABEL: define <4 x i8> @select8v(
+; TRACK_CF-SAME: <4 x i1> [[C:%.*]], <4 x i8> [[T:%.*]], <4 x i8> [[F:%.*]]) {
+; TRACK_CF-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
+; TRACK_CF-NEXT:    [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; TRACK_CF-NEXT:    [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; TRACK_CF-NEXT:    [[TMP4:%.*]] = or i8 [[TMP2]], [[TMP1]]
+; TRACK_CF-NEXT:    [[TMP5:%.*]] = or i8 [[TMP3]], [[TMP4]]
+; TRACK_CF-NEXT:    [[A:%.*]] = select <4 x i1> [[C]], <4 x i8> [[T]], <4 x i8> [[F]]
+; TRACK_CF-NEXT:    store i8 [[TMP5]], ptr @__dfsan_retval_tls, align 2
+; TRACK_CF-NEXT:    ret <4 x i8> [[A]]
+;
+; NO_TRACK_CF-LABEL: define <4 x i8> @select8v(
+; NO_TRACK_CF-SAME: <4 x i1> [[C:%.*]], <4 x i8> [[T:%.*]], <4 x i8> [[F:%.*]]) {
+; NO_TRACK_CF-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align 2
+; NO_TRACK_CF-NEXT:    [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; NO_TRACK_CF-NEXT:    [[TMP3:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; NO_TRACK_CF-NEXT:    [[TMP4:%.*]] = or i8 [[TMP2]], [[TMP1]]
+; NO_TRACK_CF-NEXT:    [[A:%.*]] = select <4 x i1> [[C]], <4 x i8> [[T]], <4 x i8> [[F]]
+; NO_TRACK_CF-NEXT:    store i8 [[TMP4]], ptr @__dfsan_retval_tls, align 2
+; NO_TRACK_CF-NEXT:    ret <4 x i8> [[A]]
+;
   %a = select <4 x i1> %c, <4 x i8> %t, <4 x i8> %f
   ret <4 x i8> %a
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/store.ll b/llvm/test/Instrumentation/DataFlowSanitizer/store.ll
index bc2a70e..1c8ab65 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/store.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/store.ll
@@ -16,7 +16,7 @@ define void @store0({} %v, ptr %p) {
 define void @store8(i8 %v, ptr %p) {
   ; CHECK-LABEL:       @store8.dfsan
   ; NO_COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls
-  ; COMBINE_PTR_LABEL:    load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; COMBINE_PTR_LABEL:    load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
 
   ; COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i8
@@ -35,7 +35,7 @@ define void @store8(i8 %v, ptr %p) {
 define void @store16(i16 %v, ptr %p) {
   ; CHECK-LABEL:       @store16.dfsan
   ; NO_COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls
-  ; COMBINE_PTR_LABEL:    load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; COMBINE_PTR_LABEL:    load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
   ; COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i8
   ; CHECK:             ptrtoint ptr {{.*}} i64
@@ -55,7 +55,7 @@ define void @store16(i16 %v, ptr %p) {
 define void @store32(i32 %v, ptr %p) {
   ; CHECK-LABEL:       @store32.dfsan
   ; NO_COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls
-  ; COMBINE_PTR_LABEL:    load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; COMBINE_PTR_LABEL:    load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
   ; COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i8
   ; CHECK:             ptrtoint ptr {{.*}} i64
@@ -79,7 +79,7 @@ define void @store32(i32 %v, ptr %p) {
 define void @store64(i64 %v, ptr %p) {
   ; CHECK-LABEL:       @store64.dfsan
   ; NO_COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls
-  ; COMBINE_PTR_LABEL:    load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align 2
+  ; COMBINE_PTR_LABEL:    load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
   ; COMBINE_PTR_LABEL: load i8, ptr @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: or i8
   ; CHECK:             ptrtoint ptr {{.*}} i64
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/struct.ll b/llvm/test/Instrumentation/DataFlowSanitizer/struct.ll
index 8069d28..9b4a350 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/struct.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/struct.ll
@@ -56,15 +56,15 @@ define {i1, i32} @load_global_struct() {
 
 define {i1, i32} @select_struct(i1 %c, {i1, i32} %a, {i1, i32} %b) {
   ; NO_SELECT_CONTROL: @select_struct.dfsan
-  ; NO_SELECT_CONTROL: [[B:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]]
-  ; NO_SELECT_CONTROL: [[A:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+  ; NO_SELECT_CONTROL: [[B:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]]
+  ; NO_SELECT_CONTROL: [[A:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
   ; NO_SELECT_CONTROL: [[C:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; NO_SELECT_CONTROL: [[S:%.*]] = select i1 %c, { i8, i8 } [[A]], { i8, i8 } [[B]]
   ; NO_SELECT_CONTROL: store { i8, i8 } [[S]], ptr @__dfsan_retval_tls, align [[ALIGN]]
 
   ; FAST: @select_struct.dfsan
-  ; FAST: %[[#R:]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]]
-  ; FAST: %[[#R+1]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+  ; FAST: %[[#R:]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]]
+  ; FAST: %[[#R+1]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
   ; FAST: %[[#R+2]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; FAST: %[[#R+3]] = select i1 %c, { i8, i8 } %[[#R+1]], { i8, i8 } %[[#R]]
   ; FAST: %[[#R+4]] = extractvalue { i8, i8 } %[[#R+3]], 0
@@ -81,7 +81,7 @@ define {i1, i32} @select_struct(i1 %c, {i1, i32} %a, {i1, i32} %b) {
 
 define { i32, i32 } @asm_struct(i32 %0, i32 %1) {
   ; FAST: @asm_struct.dfsan
-  ; FAST: [[E1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; FAST: [[E1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; FAST: [[E0:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; FAST: [[E01:%.*]] = or i8 [[E0]], [[E1]]
   ; FAST: [[S0:%.*]] = insertvalue { i8, i8 } undef, i8 [[E01]], 0
@@ -111,7 +111,7 @@ define i1 @extract_struct({i1, i5} %s) {
 
 define {i1, i5} @insert_struct({i1, i5} %s, i5 %e1) {
   ; FAST: @insert_struct.dfsan
-  ; FAST: [[EM:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; FAST: [[EM:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; FAST: [[SM:%.*]] = load { i8, i8 }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; FAST: [[SM1:%.*]] = insertvalue { i8, i8 } [[SM]], i8 [[EM]], 1
   ; FAST: store { i8, i8 } [[SM1]], ptr @__dfsan_retval_tls, align [[ALIGN]]
@@ -138,7 +138,7 @@ define {i1, i1} @load_struct(ptr %p) {
 
 define void @store_struct(ptr %p, {i1, i1} %s) {
   ; FAST: @store_struct.dfsan
-  ; FAST: [[S:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
+  ; FAST: [[S:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN:2]]
   ; FAST: [[E0:%.*]] = extractvalue { i8, i8 } [[S]], 0
   ; FAST: [[E1:%.*]] = extractvalue { i8, i8 } [[S]], 1
   ; FAST: [[E:%.*]] = or i8 [[E0]], [[E1]]
@@ -153,7 +153,7 @@ define void @store_struct(ptr %p, {i1, i1} %s) {
 
   ; COMBINE_STORE_PTR: @store_struct.dfsan
   ; COMBINE_STORE_PTR: [[PL:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]]
-  ; COMBINE_STORE_PTR: [[SL:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+  ; COMBINE_STORE_PTR: [[SL:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
   ; COMBINE_STORE_PTR: [[SL0:%.*]] = extractvalue { i8, i8 } [[SL]], 0
   ; COMBINE_STORE_PTR: [[SL1:%.*]] = extractvalue { i8, i8 } [[SL]], 1
   ; COMBINE_STORE_PTR: [[SL01:%.*]] = or i8 [[SL0]], [[SL1]]
@@ -215,7 +215,7 @@ define i1 @extract_struct_of_aggregate31(%StructOfAggr %s) {
 
 define %StructOfAggr @insert_struct_of_aggregate11(%StructOfAggr %s, i2 %e11) {
   ; FAST: @insert_struct_of_aggregate11.dfsan
-  ; FAST: [[E11:%.*]]  = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 8) to ptr), align [[ALIGN:2]]
+  ; FAST: [[E11:%.*]]  = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 8), align [[ALIGN:2]]
   ; FAST: [[S:%.*]] = load { i8, [4 x i8], i8, { i8, i8 } }, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; FAST: [[S1:%.*]] = insertvalue { i8, [4 x i8], i8, { i8, i8 } } [[S]], i8 [[E11]], 1, 1
   ; FAST: store { i8, [4 x i8], i8, { i8, i8 } } [[S1]], ptr @__dfsan_retval_tls, align [[ALIGN]]
@@ -239,12 +239,12 @@ declare %StructOfAggr @fun_with_many_aggr_args(<2 x i7> %v, [2 x i5] %a, {i3, i3
 
 define %StructOfAggr @call_many_aggr_args(<2 x i7> %v, [2 x i5] %a, {i3, i3} %s) {
   ; FAST: @call_many_aggr_args.dfsan
-  ; FAST: [[S:%.*]] = load { i8, i8 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN:2]]
-  ; FAST: [[A:%.*]] = load [2 x i8], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
+  ; FAST: [[S:%.*]] = load { i8, i8 }, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN:2]]
+  ; FAST: [[A:%.*]] = load [2 x i8], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
   ; FAST: [[V:%.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
   ; FAST: store i8 [[V]], ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; FAST: store [2 x i8] [[A]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN]]
-  ; FAST: store { i8, i8 } [[S]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 4) to ptr), align [[ALIGN]]
+  ; FAST: store [2 x i8] [[A]], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align [[ALIGN]]
+  ; FAST: store { i8, i8 } [[S]], ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 4), align [[ALIGN]]
   ; FAST: %_dfsret = load { i8, [4 x i8], i8, { i8, i8 } }, ptr @__dfsan_retval_tls, align [[ALIGN]]
   ; FAST: store { i8, [4 x i8], i8, { i8, i8 } } %_dfsret, ptr @__dfsan_retval_tls, align [[ALIGN]]
 
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/vector.ll b/llvm/test/Instrumentation/DataFlowSanitizer/vector.ll
index 64052d6..0580c18 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/vector.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/vector.ll
@@ -1,19 +1,43 @@
-; RUN: opt < %s -passes=dfsan -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=dfsan -dfsan-add-global-name-suffix=0 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define <4 x i4> @pass_vector(<4 x i4> %v) {
-  ; CHECK-LABEL: @pass_vector.dfsan
-  ; CHECK-NEXT: %[[#REG:]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]]
-  ; CHECK-NEXT: store i8 %[[#REG]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; CHECK-NEXT: ret <4 x i4> %v
+; CHECK-LABEL: define <4 x i4> @pass_vector(
+; CHECK-SAME: <4 x i4> [[V:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    store i8 [[TMP1]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret <4 x i4> [[V]]
+;
   ret <4 x i4> %v
 }
 
 define void @load_update_store_vector(ptr %p) {
-  ; CHECK-LABEL: @load_update_store_vector.dfsan
-  ; CHECK: {{.*}} = load i8, ptr @__dfsan_arg_tls, align 2
-
+; CHECK-LABEL: define void @load_update_store_vector(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = or i8 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = or i8 [[TMP8]], [[TMP1]]
+; CHECK-NEXT:    [[V:%.*]] = load <4 x i4>, ptr [[P]], align 2
+; CHECK-NEXT:    [[E2:%.*]] = extractelement <4 x i4> [[V]], i32 2
+; CHECK-NEXT:    [[V1:%.*]] = insertelement <4 x i4> [[V]], i4 [[E2]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 87960930222080
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0
+; CHECK-NEXT:    store i8 [[TMP9]], ptr [[TMP13]], align 1
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP12]], i32 1
+; CHECK-NEXT:    store i8 [[TMP9]], ptr [[TMP14]], align 1
+; CHECK-NEXT:    store <4 x i4> [[V1]], ptr [[P]], align 2
+; CHECK-NEXT:    ret void
+;
   %v = load <4 x i4>, ptr %p
   %e2 = extractelement <4 x i4> %v, i32 2
   %v1 = insertelement <4 x i4> %v, i4 %e2, i32 0
@@ -22,36 +46,37 @@ define void @load_update_store_vector(ptr %p) {
 }
 
 define <4 x i1> @icmp_vector(<4 x i8> %a, <4 x i8> %b) {
-  ; CHECK-LABEL: @icmp_vector.dfsan
-  ; CHECK-NEXT: %[[B:.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__dfsan_arg_tls to i64), i64 2) to ptr), align [[ALIGN:2]]
-  ; CHECK-NEXT: %[[A:.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; CHECK:       %[[L:.*]] = or i8 %[[A]], %[[B]]
-
-  ; CHECK: %r = icmp eq <4 x i8> %a, %b
-  ; CHECK: store i8 %[[L]], ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; CHECK: ret <4 x i1> %r
-
+; CHECK-LABEL: define <4 x i1> @icmp_vector(
+; CHECK-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr getelementptr (i8, ptr @__dfsan_arg_tls, i64 2), align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i8> [[A]], [[B]]
+; CHECK-NEXT:    store i8 [[TMP3]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret <4 x i1> [[R]]
+;
   %r = icmp eq <4 x i8> %a, %b
   ret <4 x i1> %r
 }
 
 define <2 x i32> @const_vector() {
-  ; CHECK-LABEL: @const_vector.dfsan
-  ; CHECK-NEXT: store i8 0, ptr @__dfsan_retval_tls, align 2
-  ; CHECK-NEXT: ret <2 x i32> <i32 42, i32 11>
-
+; CHECK-LABEL: define <2 x i32> @const_vector() {
+; CHECK-NEXT:    store i8 0, ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret <2 x i32> <i32 42, i32 11>
+;
   ret <2 x i32> < i32 42, i32 11 >
 }
 
 define <4 x i4> @call_vector(<4 x i4> %v) {
-  ; CHECK-LABEL: @call_vector.dfsan
-  ; CHECK-NEXT: %[[V:.*]] = load i8, ptr @__dfsan_arg_tls, align [[ALIGN:2]]
-  ; CHECK-NEXT: store i8 %[[V]], ptr @__dfsan_arg_tls, align [[ALIGN]]
-  ; CHECK-NEXT: %r = call <4 x i4> @pass_vector.dfsan(<4 x i4> %v)
-  ; CHECK-NEXT: %_dfsret = load i8, ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; CHECK-NEXT: store i8 %_dfsret, ptr @__dfsan_retval_tls, align [[ALIGN]]
-  ; CHECK-NEXT: ret <4 x i4> %r
-
+; CHECK-LABEL: define <4 x i4> @call_vector(
+; CHECK-SAME: <4 x i4> [[V:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    store i8 [[TMP1]], ptr @__dfsan_arg_tls, align 2
+; CHECK-NEXT:    [[R:%.*]] = call <4 x i4> @pass_vector(<4 x i4> [[V]])
+; CHECK-NEXT:    [[_DFSRET:%.*]] = load i8, ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    store i8 [[_DFSRET]], ptr @__dfsan_retval_tls, align 2
+; CHECK-NEXT:    ret <4 x i4> [[R]]
+;
   %r = call <4 x i4> @pass_vector(<4 x i4> %v)
   ret <4 x i4> %r
 }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s b/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s
index a21f762..73653d0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s
@@ -1,64 +1,71 @@
-// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX1250,GFX1250-ASM %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250,GFX1250-DIS %s
 
 s_mov_b64 s[2:3], 0x10abcdef12345678
-// GFX1250: s_mov_b64 s[2:3], lit64(0x10abcdef12345678)    ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_mov_b64 s[2:3], 0x10abcdef12345678    ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_add_nc_u64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_add_nc_u64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_add_nc_u64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_mul_u64 s[2:3], 0x10abcdef12345678, s[4:5]
-// GFX1250: s_mul_u64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_mul_u64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_and_b64 s[2:3], 0x10abcdef12345678, s[4:5]
-// GFX1250: s_and_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_and_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_or_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_or_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_or_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_xor_b64 s[2:3], 0x10abcdef12345678, s[4:5]
-// GFX1250: s_xor_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_xor_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_and_not1_b64 s[2:3], 0x10abcdef12345678, 0x10abcdef12345678
-// GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), lit64(0x10abcdef12345678) ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, 0x10abcdef12345678 ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_or_not1_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_andn2_b64 s[2:3], 0x10abcdef12345678, s[4:5]
-// GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_orn2_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_or_not1_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_nand_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_nand_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_nand_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_nor_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_nor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_nor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_xnor_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_xnor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_xnor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_lshl_b64 s[2:3], 0x10abcdef12345678, s4
-// GFX1250: s_lshl_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_lshl_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_lshr_b64 s[2:3], 0x10abcdef12345678, s4
-// GFX1250: s_lshr_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_lshr_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_ashr_i64 s[2:3], 0x10abcdef12345678, s4
-// GFX1250: s_ashr_i64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_ashr_i64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_bfe_u64 s[2:3], 0x10abcdef12345678, 5
-// GFX1250: s_bfe_u64 s[2:3], lit64(0x10abcdef12345678), 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_bfe_u64 s[2:3], 0x10abcdef12345678, 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_bfe_i64 s[2:3], 0x80abcdef12345678, 5
-// GFX1250: s_bfe_i64 s[2:3], lit64(0x80abcdef12345678), 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80]
+// GFX1250: s_bfe_i64 s[2:3], 0x80abcdef12345678, 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80]
 
 s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678
-// GFX1250: s_cselect_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 s_mov_b64 s[2:3], 0xffffffff01234567
-// GFX1250: s_mov_b64 s[2:3], lit64(0xffffffff01234567) ; encoding: [0xfe,0x01,0x82,0xbe,0x67,0x45,0x23,0x01,0xff,0xff,0xff,0xff]
+// GFX1250: s_mov_b64 s[2:3], 0xffffffff01234567    ; encoding: [0xfe,0x01,0x82,0xbe,0x67,0x45,0x23,0x01,0xff,0xff,0xff,0xff]
 
+// TODO: disasm
 s_mov_b64 s[2:3], lit64(0x777)
-// GFX1250: s_mov_b64 s[2:3], 0x777 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00]
+// GFX1250-ASM: s_mov_b64 s[2:3], lit64(0x777)          ; encoding: [0xfe,0x01,0x82,0xbe,0x77,0x07,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: s_mov_b64 s[2:3], 0x777                 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00]
+
+s_mov_b64 s[2:3], 0x777
+// GFX1250: s_mov_b64 s[2:3], 0x777                     ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
index 5cf484f..cc351af 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s
@@ -1,61 +1,63 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
 
 s_add_pc_i64 s[2:3]
 // GFX1250: s_add_pc_i64 s[2:3]                     ; encoding: [0x02,0x4b,0x80,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 s_add_pc_i64 4
 // GFX1250: s_add_pc_i64 4                          ; encoding: [0x84,0x4b,0x80,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 s_add_pc_i64 100
 // GFX1250: s_add_pc_i64 0x64                       ; encoding: [0xff,0x4b,0x80,0xbe,0x64,0x00,0x00,0x00]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 s_add_pc_i64 0x12345678abcd0
-// GFX1250: s_add_pc_i64 lit64(0x12345678abcd0)            ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX1250: s_add_pc_i64 0x12345678abcd0            ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 s_get_pc_i64 s[2:3]
 // GFX1250: s_get_pc_i64 s[2:3]                     ; encoding: [0x00,0x47,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 s_getpc_b64 s[2:3]
 // GFX1250: s_get_pc_i64 s[2:3]                     ; encoding: [0x00,0x47,0x82,0xbe]
 
 s_set_pc_i64 s[2:3]
 // GFX1250: s_set_pc_i64 s[2:3]                     ; encoding: [0x02,0x48,0x80,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 s_setpc_b64 s[2:3]
 // GFX1250: s_set_pc_i64 s[2:3]                     ; encoding: [0x02,0x48,0x80,0xbe]
 
 s_swap_pc_i64 s[2:3], 10
 // GFX1250: s_swap_pc_i64 s[2:3], 10                ; encoding: [0x8a,0x49,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 s_swappc_b64 s[2:3], 10
 // GFX1250: s_swap_pc_i64 s[2:3], 10                ; encoding: [0x8a,0x49,0x82,0xbe]
 
 s_rfe_i64 s[2:3]
 // GFX1250: s_rfe_i64 s[2:3]                        ; encoding: [0x02,0x4a,0x80,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 s_rfe_b64 s[2:3]
 // GFX1250: s_rfe_i64 s[2:3]                        ; encoding: [0x02,0x4a,0x80,0xbe]
 
 s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE)
 // GFX1250: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE) ; encoding: [0x88,0x4c,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: specified message id is not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:31: error: specified message id is not supported on this GPU
 
 s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE)
 // GFX1250: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE) ; encoding: [0x88,0x4d,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: specified message id is not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:35: error: specified message id is not supported on this GPU
 
 s_get_shader_cycles_u64 s[2:3]
 // GFX1250: s_get_shader_cycles_u64 s[2:3]          ; encoding: [0x00,0x06,0x82,0xbe]
-// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 s_barrier_signal -3
 // GFX1250: s_barrier_signal -3                     ; encoding: [0xc3,0x4e,0x80,0xbe]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s b/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s
index 7395a51..58da119 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s
@@ -1,211 +1,213 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
 
 v_ceil_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_ceil_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cvt_f32_f64 v255, 0x10abcdef12345678
-// GFX1250: v_cvt_f32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cvt_f32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cvt_i32_f64 v255, 0x10abcdef12345678
-// GFX1250: v_cvt_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cvt_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cvt_u32_f64 v255, 0x10abcdef12345678
-// GFX1250: v_cvt_u32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cvt_u32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_floor_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_floor_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_floor_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_fract_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_fract_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_fract_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_frexp_exp_i32_f64 v255, 0x10abcdef12345678
-// GFX1250: v_frexp_exp_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_frexp_exp_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_frexp_mant_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_frexp_mant_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_frexp_mant_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_rcp_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_rcp_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_rcp_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_rndne_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_rndne_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_rndne_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_rsq_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_rsq_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_rsq_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_sqrt_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_sqrt_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_sqrt_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_trunc_f64 v[254:255], 0x10abcdef12345678
-// GFX1250: v_trunc_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_trunc_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_add_f64 v[254:255], 0x10abcdef12345678, v[254:255]
-// GFX1250: v_add_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_add_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_max_num_f64 v[254:255], 0x10abcdef12345678, v[254:255]
-// GFX1250: v_max_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_max_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_min_num_f64 v[254:255], 0x10abcdef12345678, v[254:255]
-// GFX1250: v_min_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_min_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_mul_f64 v[254:255], 0x10abcdef12345678, v[254:255]
-// GFX1250: v_mul_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_mul_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_class_f64 vcc_lo, 0x10abcdef12345678, v255
-// GFX1250: v_cmp_class_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_class_f64_e32 vcc_lo, 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_eq_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_eq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_eq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_ge_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_ge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_ge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_gt_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_gt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_gt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_gt_i64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_gt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_gt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_gt_u64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_gt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_gt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_le_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_le_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_le_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_le_i64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_le_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_le_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_le_u64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_le_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_le_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_lg_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_lg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_lg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_lt_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_lt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_lt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_lt_i64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_lt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_lt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_lt_u64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_lt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_lt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_ne_i64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_ne_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_ne_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_ne_u64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_ne_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_ne_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_neq_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_neq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_neq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_nge_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_nge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_nge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_ngt_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_ngt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_ngt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_nle_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_nle_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_nle_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_nlg_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_nlg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_nlg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_nlt_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_nlt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_nlt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_o_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_o_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_o_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmp_u_f64 vcc_lo, 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmp_u_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmp_u_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_class_f64 0x10abcdef12345678, v255
-// GFX1250: v_cmpx_class_f64_e32 lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_class_f64_e32 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_eq_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_eq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_eq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_eq_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_eq_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_eq_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_eq_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_eq_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_eq_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_ge_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_ge_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ge_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ge_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_ge_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ge_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ge_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_gt_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_gt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_gt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_gt_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_gt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_gt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_gt_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_gt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_gt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_le_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_le_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_le_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_le_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_le_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_le_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_le_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_le_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_le_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_lg_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_lg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_lg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_lt_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_lt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_lt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_lt_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_lt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_lt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_lt_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_lt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_lt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_ne_i64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ne_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ne_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_ne_u64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ne_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ne_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_neq_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_neq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_neq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_nge_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_nge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_nge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_ngt_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_ngt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_ngt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_nle_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_nle_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_nle_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_nlg_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_nlg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_nlg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_nlt_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_nlt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_nlt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_o_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_o_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_o_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_cmpx_u_f64 0x10abcdef12345678, v[254:255]
-// GFX1250: v_cmpx_u_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+// GFX1250: v_cmpx_u_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 v_ceil_f64 v[254:255], 153.1
-// GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x4063233333333333) ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40]
+// GFX1250: v_ceil_f64_e32 v[254:255], 0x4063233333333333 ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40]
 
 v_ceil_f64 v[254:255], 1.5e22
-// GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x448969368974c05b) ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44]
+// GFX1250: v_ceil_f64_e32 v[254:255], 0x448969368974c05b ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44]
 
 // These 64-bit literals can be represented as 32-bit with encoding 255. HW behavior:
 // 64 bit float: the lower 32-bit are padded with zero
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s
index 2a761d9..16cec8b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s
@@ -1,7 +1,7 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
 
 global_load_b96 v[1:3], v[0:1], off
-// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 flat_load_b32 v5, v[2:3] scale_offset
 // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 811c6eb..6950c72 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -26,7 +26,7 @@ v_mov_b64 v[4:5], 0.5
 // GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
 
 v_mov_b64 v[254:255], 0xaf123456
-// GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456    ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 v_tanh_f32 v5, v1
 // GFX1250: v_tanh_f32_e32 v5, v1                   ; encoding: [0x01,0x3d,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 40fcd6f..0d61c1f 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -1,5 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
 
 v_mov_b64_e32 v[4:5], v[2:3]
 // GFX1250: v_mov_b64_e32 v[4:5], v[2:3]            ; encoding: [0x02,0x3b,0x08,0x7e]
@@ -26,7 +27,7 @@ v_mov_b64 v[4:5], 0.5
 // GFX1250: v_mov_b64_e32 v[4:5], 0.5               ; encoding: [0xf0,0x3a,0x08,0x7e]
 
 v_mov_b64 v[254:255], 0xaf123456
-// GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456    ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 v_tanh_f32 v5, v1
 // GFX1250: v_tanh_f32_e32 v5, v1                   ; encoding: [0x01,0x3d,0x0a,0x7e]
@@ -628,8 +629,8 @@ v_cvt_f16_fp8 v1.l, 0x1234
 v_cvt_f16_fp8 v1.h, v2
 // GFX1250: v_cvt_f16_fp8_e32 v1.h, v2              ; encoding: [0x02,0xef,0x02,0x7f]
 
-v_cvt_pk_f16_bf8 v1, v2
-// GFX1250: v_cvt_pk_f16_bf8 v1, v2                 ; encoding: [0x02,0xed,0x02,0x7e]
+v_cvt_pk_f16_bf8 v1, v2.l
+// GFX1250: v_cvt_pk_f16_bf8 v1, v2.l               ; encoding: [0x02,0xed,0x02,0x7e]
 
 v_cvt_pk_f16_bf8 v1, s2
 // GFX1250: v_cvt_pk_f16_bf8 v1, s2                 ; encoding: [0x02,0xec,0x02,0x7e]
@@ -637,8 +638,8 @@ v_cvt_pk_f16_bf8 v1, s2
 v_cvt_pk_f16_bf8 v1, 100
 // GFX1250: v_cvt_pk_f16_bf8 v1, 0x64               ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00]
 
-v_cvt_pk_f16_fp8 v1, v2
-// GFX1250: v_cvt_pk_f16_fp8 v1, v2                 ; encoding: [0x02,0xeb,0x02,0x7e]
+v_cvt_pk_f16_fp8 v1, v2.l
+// GFX1250: v_cvt_pk_f16_fp8 v1, v2.l               ; encoding: [0x02,0xeb,0x02,0x7e]
 
 v_cvt_pk_f16_fp8 v1, s2
 // GFX1250: v_cvt_pk_f16_fp8 v1, s2                 ; encoding: [0x02,0xea,0x02,0x7e]
@@ -694,8 +695,8 @@ v_cvt_pk_f32_fp8_e32 v[2:3], s3
 v_cvt_pk_f32_fp8_e32 v[2:3], 3
 // GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], 3          ; encoding: [0x83,0xdc,0x04,0x7e]
 
-v_cvt_pk_f32_fp8_e32 v[2:3], v3
-// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], v3         ; encoding: [0x03,0xdd,0x04,0x7e]
+v_cvt_pk_f32_fp8_e32 v[2:3], v3.l
+// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], v3.l       ; encoding: [0x03,0xdd,0x04,0x7e]
 
 v_cvt_pk_f32_fp8_e32 v[4:5], v127.h
 // GFX1250: v_cvt_pk_f32_fp8_e32 v[4:5], v127.h     ; encoding: [0xff,0xdd,0x08,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
index 0a1d3bf..02872b0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s
@@ -1,5 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250,GFX1250-ASM %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250,GFX1250-DIS %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200-ERR --implicit-check-not=error: %s
 
 v_fmac_f64 v[4:5], v[2:3], v[4:5]
@@ -195,7 +196,7 @@ v_add_nc_u64 v[4:5], -4.0, v[4:5]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_nc_u64 v[4:5], 0xaf123456, v[4:5]
-// GFX1250: v_add_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_add_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_nc_u64 v[4:5], 0x3f717273, v[4:5]
@@ -315,7 +316,7 @@ v_sub_nc_u64 v[4:5], -4.0, v[4:5]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_sub_nc_u64 v[4:5], 0xaf123456, v[4:5]
-// GFX1250: v_sub_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_sub_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_sub_nc_u64 v[4:5], 0x3f717273, v[4:5]
@@ -435,7 +436,7 @@ v_mul_u64 v[4:5], -4.0, v[4:5]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_mul_u64 v[4:5], 0xaf123456, v[4:5]
-// GFX1250: v_mul_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: v_mul_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_mul_u64 v[4:5], 0x3f717273, v[4:5]
@@ -519,7 +520,7 @@ v_fmamk_f64 v[6:7], v[254:255], 0x405ec00000000000, v[2:3]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3]
-// GFX1250: v_fmamk_f64 v[6:7], s[2:3], lit64(0x405ec00012345678), v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmamk_f64 v[6:7], vcc, 0x405ec000, v[2:3]
@@ -551,7 +552,7 @@ v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[254:255]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255]
-// GFX1250: v_fmamk_f64 v[254:255], lit64(0x405ec00012345678), lit64(0x405ec00012345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmamk_f64 v[254:255], 123.0, 0x405ec000, v[2:3]
@@ -559,15 +560,15 @@ v_fmamk_f64 v[254:255], 123.0, 0x405ec000, v[2:3]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmamk_f64 v[4:5], v[2:3], 123.1, v[6:7]
-// GFX1250: v_fmamk_f64 v[4:5], v[2:3], lit64(0x405ec66666666666), v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[4:5], v[2:3], 0x405ec66666666666, v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmamk_f64 v[4:5], 0x405ec66666666666, 123.1, v[6:7]
-// GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmamk_f64 v[4:5], 123.1, 123.1, v[8:9]
-// GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmamk_f64 v[4:5], 1.0, 1.0, v[6:7]
@@ -595,7 +596,7 @@ v_fmaak_f64 v[6:7], v[254:255], v[8:9], 0x405ec00000000000
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678
-// GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], lit64(0x405ec00012345678) ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678 ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmaak_f64 v[6:7], vcc, v[8:9], 0x405ec000
@@ -631,27 +632,28 @@ v_fmaak_f64 v[254:255], 0x405ec00000000000, v[254:255], 0x405ec00000000000
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678
-// GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit(0x405ec00012345678)
-// GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
-// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250-ASM: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1250-DIS: v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+// GFX1200-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 
 v_fmaak_f64 v[254:255], 123.0, v[2:3], 0x405ec000
 // GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[2:3], 0x405ec000 ; encoding: [0xfe,0x04,0xfc,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmaak_f64 v[4:5], v[2:3], v[2:3], 123.1
-// GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], lit64(0x405ec66666666666) ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], 0x405ec66666666666 ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 123.1
-// GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[6:7], lit64(0x405ec66666666666) ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 0x405ec66666666666 ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmaak_f64 v[4:5], 123.1, v[8:9], 123.1
-// GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[8:9], lit64(0x405ec66666666666) ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+// GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[8:9], 0x405ec66666666666 ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_fmaak_f64 v[4:5], 1.0, v[8:9], 1.0
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s
index 9f50361..a83d84f 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s
@@ -1,7 +1,7 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s
 
 v_add_f64 v[1:2], v[1:2], v[1:2]
-// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_fmaak_f32 v4, v2, v6, 3 row_share:1
 // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
index 949847e..ad5771b 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
@@ -1,6 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefixes=GFX12,GFX1200 %s
 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
 
 s_alloc_vgpr 0x1235
 // GFX12: s_alloc_vgpr 0x1235                     ; encoding: [0xff,0x53,0x80,0xbe,0x35,0x12,0x00,0x00]
@@ -859,7 +860,7 @@ s_mov_b64 s[0:1], 0x3f717273
 
 s_mov_b64 s[0:1], 0xaf123456
 // GFX1200: s_mov_b64 s[0:1], 0xaf123456            ; encoding: [0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_mov_b64 s[0:1], lit64(0xaf123456)     ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_mov_b64 s[0:1], 0xaf123456            ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_mov_b64 s[0:1], null
 // GFX12: s_mov_b64 s[0:1], null                  ; encoding: [0x7c,0x01,0x80,0xbe]
@@ -968,7 +969,7 @@ s_cmov_b64 s[0:1], 0x3f717273
 
 s_cmov_b64 s[0:1], 0xaf123456
 // GFX1200: s_cmov_b64 s[0:1], 0xaf123456           ; encoding: [0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cmov_b64 s[0:1], lit64(0xaf123456)    ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cmov_b64 s[0:1], 0xaf123456           ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_not_b32 s0, s1
 // GFX12: s_not_b32 s0, s1                        ; encoding: [0x01,0x1e,0x80,0xbe]
@@ -1071,7 +1072,7 @@ s_not_b64 s[0:1], 0x3f717273
 
 s_not_b64 s[0:1], 0xaf123456
 // GFX1200: s_not_b64 s[0:1], 0xaf123456            ; encoding: [0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_not_b64 s[0:1], lit64(0xaf123456)     ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_not_b64 s[0:1], 0xaf123456            ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_wqm_b32 s0, s1
 // GFX12: s_wqm_b32 s0, s1                        ; encoding: [0x01,0x1c,0x80,0xbe]
@@ -1174,7 +1175,7 @@ s_wqm_b64 s[0:1], 0x3f717273
 
 s_wqm_b64 s[0:1], 0xaf123456
 // GFX1200: s_wqm_b64 s[0:1], 0xaf123456            ; encoding: [0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_wqm_b64 s[0:1], lit64(0xaf123456)     ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_wqm_b64 s[0:1], 0xaf123456            ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_brev_b32 s0, s1
 // GFX12: s_brev_b32 s0, s1                       ; encoding: [0x01,0x04,0x80,0xbe]
@@ -1277,7 +1278,7 @@ s_brev_b64 s[0:1], 0x3f717273
 
 s_brev_b64 s[0:1], 0xaf123456
 // GFX1200: s_brev_b64 s[0:1], 0xaf123456           ; encoding: [0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_brev_b64 s[0:1], lit64(0xaf123456)    ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_brev_b64 s[0:1], 0xaf123456           ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_bcnt0_i32_b32 s0, s1
 // GFX12: s_bcnt0_i32_b32 s0, s1                  ; encoding: [0x01,0x16,0x80,0xbe]
@@ -1389,7 +1390,7 @@ s_bcnt0_i32_b64 s0, 0x3f717273
 
 s_bcnt0_i32_b64 s0, 0xaf123456
 // GFX1200: s_bcnt0_i32_b64 s0, 0xaf123456          ; encoding: [0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_bcnt0_i32_b64 s0, lit64(0xaf123456)   ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_bcnt0_i32_b64 s0, 0xaf123456          ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_bcnt1_i32_b32 s0, s1
 // GFX12: s_bcnt1_i32_b32 s0, s1                  ; encoding: [0x01,0x18,0x80,0xbe]
@@ -1501,7 +1502,7 @@ s_bcnt1_i32_b64 s0, 0x3f717273
 
 s_bcnt1_i32_b64 s0, 0xaf123456
 // GFX1200: s_bcnt1_i32_b64 s0, 0xaf123456          ; encoding: [0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_bcnt1_i32_b64 s0, lit64(0xaf123456)   ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_bcnt1_i32_b64 s0, 0xaf123456          ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_ff1_i32_b32 s0, s1
 // GFX12: s_ctz_i32_b32 s0, s1                    ; encoding: [0x01,0x08,0x80,0xbe]
@@ -1613,7 +1614,7 @@ s_ff1_i32_b64 s0, 0x3f717273
 
 s_ff1_i32_b64 s0, 0xaf123456
 // GFX1200: s_ctz_i32_b64 s0, 0xaf123456            ; encoding: [0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_ctz_i32_b64 s0, lit64(0xaf123456)     ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_ctz_i32_b64 s0, 0xaf123456            ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_flbit_i32_b32 s0, s1
 // GFX12: s_clz_i32_u32 s0, s1                    ; encoding: [0x01,0x0a,0x80,0xbe]
@@ -1725,7 +1726,7 @@ s_flbit_i32_b64 s0, 0x3f717273
 
 s_flbit_i32_b64 s0, 0xaf123456
 // GFX1200: s_clz_i32_u64 s0, 0xaf123456            ; encoding: [0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_clz_i32_u64 s0, lit64(0xaf123456)     ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_clz_i32_u64 s0, 0xaf123456            ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_flbit_i32 s0, s1
 // GFX12: s_cls_i32 s0, s1                        ; encoding: [0x01,0x0c,0x80,0xbe]
@@ -1837,7 +1838,7 @@ s_flbit_i32_i64 s0, 0x3f717273
 
 s_flbit_i32_i64 s0, 0xaf123456
 // GFX1200: s_cls_i32_i64 s0, 0xaf123456            ; encoding: [0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cls_i32_i64 s0, lit64(0xaf123456)     ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cls_i32_i64 s0, 0xaf123456            ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_sext_i32_i8 s0, s1
 // GFX12: s_sext_i32_i8 s0, s1                    ; encoding: [0x01,0x0e,0x80,0xbe]
@@ -2283,7 +2284,7 @@ s_and_saveexec_b64 s[0:1], 0x3f717273
 
 s_and_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_and_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_or_saveexec_b64 s[0:1], s[2:3]
 // GFX12: s_or_saveexec_b64 s[0:1], s[2:3]        ; encoding: [0x02,0x23,0x80,0xbe]
@@ -2323,7 +2324,7 @@ s_or_saveexec_b64 s[0:1], 0x3f717273
 
 s_or_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_or_saveexec_b64 s[0:1], 0xaf123456    ; encoding: [0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_saveexec_b64 s[0:1], 0xaf123456    ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_xor_saveexec_b64 s[0:1], s[2:3]
 // GFX12: s_xor_saveexec_b64 s[0:1], s[2:3]       ; encoding: [0x02,0x25,0x80,0xbe]
@@ -2363,7 +2364,7 @@ s_xor_saveexec_b64 s[0:1], 0x3f717273
 
 s_xor_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_xor_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xor_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_andn2_saveexec_b64 s[0:1], s[2:3]
 // GFX12: s_and_not1_saveexec_b64 s[0:1], s[2:3]  ; encoding: [0x02,0x31,0x80,0xbe]
@@ -2403,7 +2404,7 @@ s_andn2_saveexec_b64 s[0:1], 0x3f717273
 
 s_andn2_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_orn2_saveexec_b64 s[0:1], s[2:3]
 // GFX12: s_or_not1_saveexec_b64 s[0:1], s[2:3]   ; encoding: [0x02,0x33,0x80,0xbe]
@@ -2443,7 +2444,7 @@ s_orn2_saveexec_b64 s[0:1], 0x3f717273
 
 s_orn2_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_nand_saveexec_b64 s[0:1], s[2:3]
 // GFX12: s_nand_saveexec_b64 s[0:1], s[2:3]      ; encoding: [0x02,0x27,0x80,0xbe]
@@ -2483,7 +2484,7 @@ s_nand_saveexec_b64 s[0:1], 0x3f717273
 
 s_nand_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_nand_saveexec_b64 s[0:1], 0xaf123456  ; encoding: [0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nand_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nand_saveexec_b64 s[0:1], 0xaf123456  ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_nor_saveexec_b64 s[0:1], s[2:3]
 // GFX12: s_nor_saveexec_b64 s[0:1], s[2:3]       ; encoding: [0x02,0x29,0x80,0xbe]
@@ -2523,7 +2524,7 @@ s_nor_saveexec_b64 s[0:1], 0x3f717273
 
 s_nor_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_nor_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nor_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_xnor_saveexec_b64 s[0:1], s[2:3]
 // GFX12: s_xnor_saveexec_b64 s[0:1], s[2:3]      ; encoding: [0x02,0x2b,0x80,0xbe]
@@ -2563,7 +2564,7 @@ s_xnor_saveexec_b64 s[0:1], 0x3f717273
 
 s_xnor_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_xnor_saveexec_b64 s[0:1], 0xaf123456  ; encoding: [0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xnor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xnor_saveexec_b64 s[0:1], 0xaf123456  ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_quadmask_b32 s0, s1
 // GFX12: s_quadmask_b32 s0, s1                   ; encoding: [0x01,0x1a,0x80,0xbe]
@@ -2666,7 +2667,7 @@ s_quadmask_b64 s[0:1], 0x3f717273
 
 s_quadmask_b64 s[0:1], 0xaf123456
 // GFX1200: s_quadmask_b64 s[0:1], 0xaf123456       ; encoding: [0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_quadmask_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_quadmask_b64 s[0:1], 0xaf123456       ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_movrels_b32 s0, s1
 // GFX12: s_movrels_b32 s0, s1                    ; encoding: [0x01,0x40,0x80,0xbe]
@@ -2811,7 +2812,7 @@ s_movreld_b64 s[0:1], 0x3f717273
 
 s_movreld_b64 s[0:1], 0xaf123456
 // GFX1200: s_movreld_b64 s[0:1], 0xaf123456        ; encoding: [0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_movreld_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_movreld_b64 s[0:1], 0xaf123456        ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_abs_i32 s0, s1
 // GFX12: s_abs_i32 s0, s1                        ; encoding: [0x01,0x15,0x80,0xbe]
@@ -2911,7 +2912,7 @@ s_andn1_saveexec_b64 s[0:1], 0x3f717273
 
 s_andn1_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_orn1_saveexec_b64 s[0:1], s[2:3]
 // GFX12: s_or_not0_saveexec_b64 s[0:1], s[2:3]   ; encoding: [0x02,0x2f,0x80,0xbe]
@@ -2951,7 +2952,7 @@ s_orn1_saveexec_b64 s[0:1], 0x3f717273
 
 s_orn1_saveexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_andn1_wrexec_b64 s[0:1], s[2:3]
 // GFX12: s_and_not0_wrexec_b64 s[0:1], s[2:3]    ; encoding: [0x02,0x35,0x80,0xbe]
@@ -2991,7 +2992,7 @@ s_andn1_wrexec_b64 s[0:1], 0x3f717273
 
 s_andn1_wrexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not0_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_andn2_wrexec_b64 s[0:1], s[2:3]
 // GFX12: s_and_not1_wrexec_b64 s[0:1], s[2:3]    ; encoding: [0x02,0x37,0x80,0xbe]
@@ -3031,7 +3032,7 @@ s_andn2_wrexec_b64 s[0:1], 0x3f717273
 
 s_andn2_wrexec_b64 s[0:1], 0xaf123456
 // GFX1200: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_bitreplicate_b64_b32 s[0:1], s2
 // GFX12: s_bitreplicate_b64_b32 s[0:1], s2       ; encoding: [0x02,0x14,0x80,0xbe]
@@ -3830,7 +3831,7 @@ s_ctz_i32_b64 exec_hi, src_scc
 
 s_ctz_i32_b64 null, 0xaf123456
 // GFX1200: s_ctz_i32_b64 null, 0xaf123456          ; encoding: [0xff,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_ctz_i32_b64 null, lit64(0xaf123456)   ; encoding: [0xfe,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_ctz_i32_b64 null, 0xaf123456          ; encoding: [0xfe,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_and_not1_saveexec_b64 s[10:11], s[2:3]
 // GFX12: s_and_not1_saveexec_b64 s[10:11], s[2:3] ; encoding: [0x02,0x31,0x8a,0xbe]
@@ -3858,7 +3859,7 @@ s_and_not1_saveexec_b64 ttmp[14:15], src_scc
 
 s_and_not1_saveexec_b64 null, 0xaf123456
 // GFX1200: s_and_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_and_not0_saveexec_b32 s5, s1
 // GFX12: s_and_not0_saveexec_b32 s5, s1          ; encoding: [0x01,0x2c,0x85,0xbe]
@@ -3919,7 +3920,7 @@ s_and_not0_saveexec_b64 ttmp[14:15], src_scc
 
 s_and_not0_saveexec_b64 null, 0xaf123456
 // GFX1200: s_and_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not0_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_and_not0_wrexec_b32 s5, s1
 // GFX12: s_and_not0_wrexec_b32 s5, s1            ; encoding: [0x01,0x34,0x85,0xbe]
@@ -3980,7 +3981,7 @@ s_and_not0_wrexec_b64 ttmp[14:15], src_scc
 
 s_and_not0_wrexec_b64 null, 0xaf123456
 // GFX1200: s_and_not0_wrexec_b64 null, 0xaf123456  ; encoding: [0xff,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not0_wrexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not0_wrexec_b64 null, 0xaf123456  ; encoding: [0xfe,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_and_not1_saveexec_b32 s5, s1
 // GFX12: s_and_not1_saveexec_b32 s5, s1          ; encoding: [0x01,0x30,0x85,0xbe]
@@ -4074,7 +4075,7 @@ s_and_not1_wrexec_b64 ttmp[14:15], src_scc
 
 s_and_not1_wrexec_b64 null, 0xaf123456
 // GFX1200: s_and_not1_wrexec_b64 null, 0xaf123456  ; encoding: [0xff,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_wrexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_wrexec_b64 null, 0xaf123456  ; encoding: [0xfe,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_cls_i32 s5, s1
 // GFX12: s_cls_i32 s5, s1                        ; encoding: [0x01,0x0c,0x85,0xbe]
@@ -4144,7 +4145,7 @@ s_cls_i32_i64 exec_hi, src_scc
 
 s_cls_i32_i64 null, 0xaf123456
 // GFX1200: s_cls_i32_i64 null, 0xaf123456          ; encoding: [0xff,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cls_i32_i64 null, lit64(0xaf123456)   ; encoding: [0xfe,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cls_i32_i64 null, 0xaf123456          ; encoding: [0xfe,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_clz_i32_u32 s5, s1
 // GFX12: s_clz_i32_u32 s5, s1                    ; encoding: [0x01,0x0a,0x85,0xbe]
@@ -4214,7 +4215,7 @@ s_clz_i32_u64 exec_hi, src_scc
 
 s_clz_i32_u64 null, 0xaf123456
 // GFX1200: s_clz_i32_u64 null, 0xaf123456          ; encoding: [0xff,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_clz_i32_u64 null, lit64(0xaf123456)   ; encoding: [0xfe,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_clz_i32_u64 null, 0xaf123456          ; encoding: [0xfe,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_or_not0_saveexec_b32 s5, s1
 // GFX12: s_or_not0_saveexec_b32 s5, s1           ; encoding: [0x01,0x2e,0x85,0xbe]
@@ -4275,7 +4276,7 @@ s_or_not0_saveexec_b64 ttmp[14:15], src_scc
 
 s_or_not0_saveexec_b64 null, 0xaf123456
 // GFX1200: s_or_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not0_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_or_not1_saveexec_b32 s5, s1
 // GFX12: s_or_not1_saveexec_b32 s5, s1           ; encoding: [0x01,0x32,0x85,0xbe]
@@ -4336,4 +4337,4 @@ s_or_not1_saveexec_b64 ttmp[14:15], src_scc
 
 s_or_not1_saveexec_b64 null, 0xaf123456
 // GFX1200: s_or_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
index 2ecec4c..9c83879 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
@@ -1,6 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefixes=GFX12,GFX1200 %s
 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
 
 s_add_nc_u64 s[0:1], s[2:3], s[4:5]
 // GFX12: s_add_nc_u64 s[0:1], s[2:3], s[4:5]     ; encoding: [0x02,0x04,0x80,0xa9]
@@ -55,7 +56,7 @@ s_add_nc_u64 s[0:1], 0x3f717273, s[2:3]
 
 s_add_nc_u64 s[0:1], 0xaf123456, s[2:3]
 // GFX1200: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf]
-// GFX1250: s_add_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_add_nc_u64 s[0:1], s[2:3], exec
 // GFX12: s_add_nc_u64 s[0:1], s[2:3], exec       ; encoding: [0x02,0x7e,0x80,0xa9]
@@ -80,7 +81,7 @@ s_add_nc_u64 s[0:1], s[2:3], 0x3f717273
 
 s_add_nc_u64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf]
-// GFX1250: s_add_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_sub_nc_u64 s[0:1], s[2:3], s[4:5]
 // GFX12: s_sub_nc_u64 s[0:1], s[2:3], s[4:5]     ; encoding: [0x02,0x04,0x00,0xaa]
@@ -135,7 +136,7 @@ s_sub_nc_u64 s[0:1], 0x3f717273, s[2:3]
 
 s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3]
 // GFX1200: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf]
-// GFX1250: s_sub_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_sub_nc_u64 s[0:1], s[2:3], exec
 // GFX12: s_sub_nc_u64 s[0:1], s[2:3], exec       ; encoding: [0x02,0x7e,0x00,0xaa]
@@ -160,7 +161,7 @@ s_sub_nc_u64 s[0:1], s[2:3], 0x3f717273
 
 s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf]
-// GFX1250: s_sub_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_mul_u64 s[0:1], s[2:3], s[4:5]
 // GFX12: s_mul_u64 s[0:1], s[2:3], s[4:5]        ; encoding: [0x02,0x04,0x80,0xaa]
@@ -215,7 +216,7 @@ s_mul_u64 s[0:1], 0x3f717273, s[2:3]
 
 s_mul_u64 s[0:1], 0xaf123456, s[2:3]
 // GFX1200: s_mul_u64 s[0:1], 0xaf123456, s[2:3]    ; encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf]
-// GFX1250: s_mul_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_mul_u64 s[0:1], 0xaf123456, s[2:3]    ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_mul_u64 s[0:1], s[2:3], exec
 // GFX12: s_mul_u64 s[0:1], s[2:3], exec          ; encoding: [0x02,0x7e,0x80,0xaa]
@@ -240,7 +241,7 @@ s_mul_u64 s[0:1], s[2:3], 0x3f717273
 
 s_mul_u64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_mul_u64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf]
-// GFX1250: s_mul_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_mul_u64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_add_f32 s5, s1, s2
 // GFX12: s_add_f32 s5, s1, s2                    ; encoding: [0x01,0x02,0x05,0xa0]
@@ -2358,7 +2359,7 @@ s_cselect_b64 s[0:1], 0x3f717273, s[4:5]
 
 s_cselect_b64 s[0:1], 0xaf123456, s[4:5]
 // GFX1200: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cselect_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_cselect_b64 s[0:1], s[2:3], exec
 // GFX12: s_cselect_b64 s[0:1], s[2:3], exec      ; encoding: [0x02,0x7e,0x80,0x98]
@@ -2383,7 +2384,7 @@ s_cselect_b64 s[0:1], s[2:3], 0x3f717273
 
 s_cselect_b64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cselect_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_and_b32 s0, s1, s2
 // GFX12: s_and_b32 s0, s1, s2                    ; encoding: [0x01,0x02,0x00,0x8b]
@@ -2552,7 +2553,7 @@ s_and_b64 s[0:1], 0x3f717273, s[4:5]
 
 s_and_b64 s[0:1], 0xaf123456, s[4:5]
 // GFX1200: s_and_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_and_b64 s[0:1], s[2:3], exec
 // GFX12: s_and_b64 s[0:1], s[2:3], exec          ; encoding: [0x02,0x7e,0x80,0x8b]
@@ -2577,7 +2578,7 @@ s_and_b64 s[0:1], s[2:3], 0x3f717273
 
 s_and_b64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_and_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_or_b32 s0, s1, s2
 // GFX12: s_or_b32 s0, s1, s2                     ; encoding: [0x01,0x02,0x00,0x8c]
@@ -2737,7 +2738,7 @@ s_or_b64 s[0:1], 0x3f717273, s[4:5]
 
 s_or_b64 s[0:1], 0xaf123456, s[4:5]
 // GFX1200: s_or_b64 s[0:1], 0xaf123456, s[4:5]     ; encoding: [0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_b64 s[0:1], 0xaf123456, s[4:5]     ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_or_b64 s[0:1], s[2:3], exec
 // GFX12: s_or_b64 s[0:1], s[2:3], exec           ; encoding: [0x02,0x7e,0x80,0x8c]
@@ -2762,7 +2763,7 @@ s_or_b64 s[0:1], s[2:3], 0x3f717273
 
 s_or_b64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_or_b64 s[0:1], s[2:3], 0xaf123456     ; encoding: [0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_b64 s[0:1], s[2:3], 0xaf123456     ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_xor_b32 s0, s1, s2
 // GFX12: s_xor_b32 s0, s1, s2                    ; encoding: [0x01,0x02,0x00,0x8d]
@@ -2922,7 +2923,7 @@ s_xor_b64 s[0:1], 0x3f717273, s[4:5]
 
 s_xor_b64 s[0:1], 0xaf123456, s[4:5]
 // GFX1200: s_xor_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xor_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_xor_b64 s[0:1], s[2:3], exec
 // GFX12: s_xor_b64 s[0:1], s[2:3], exec          ; encoding: [0x02,0x7e,0x80,0x8d]
@@ -2947,7 +2948,7 @@ s_xor_b64 s[0:1], s[2:3], 0x3f717273
 
 s_xor_b64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_xor_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xor_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_andn2_b32 s0, s1, s2
 // GFX12: s_and_not1_b32 s0, s1, s2               ; encoding: [0x01,0x02,0x00,0x91]
@@ -3107,7 +3108,7 @@ s_andn2_b64 s[0:1], 0x3f717273, s[4:5]
 
 s_andn2_b64 s[0:1], 0xaf123456, s[4:5]
 // GFX1200: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_andn2_b64 s[0:1], s[2:3], exec
 // GFX12: s_and_not1_b64 s[0:1], s[2:3], exec     ; encoding: [0x02,0x7e,0x80,0x91]
@@ -3132,7 +3133,7 @@ s_andn2_b64 s[0:1], s[2:3], 0x3f717273
 
 s_andn2_b64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_orn2_b32 s0, s1, s2
 // GFX12: s_or_not1_b32 s0, s1, s2                ; encoding: [0x01,0x02,0x00,0x92]
@@ -3292,7 +3293,7 @@ s_orn2_b64 s[0:1], 0x3f717273, s[4:5]
 
 s_orn2_b64 s[0:1], 0xaf123456, s[4:5]
 // GFX1200: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_orn2_b64 s[0:1], s[2:3], exec
 // GFX12: s_or_not1_b64 s[0:1], s[2:3], exec      ; encoding: [0x02,0x7e,0x80,0x92]
@@ -3317,7 +3318,7 @@ s_orn2_b64 s[0:1], s[2:3], 0x3f717273
 
 s_orn2_b64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_nand_b32 s0, s1, s2
 // GFX12: s_nand_b32 s0, s1, s2                   ; encoding: [0x01,0x02,0x00,0x8e]
@@ -3477,7 +3478,7 @@ s_nand_b64 s[0:1], 0x3f717273, s[4:5]
 
 s_nand_b64 s[0:1], 0xaf123456, s[4:5]
 // GFX1200: s_nand_b64 s[0:1], 0xaf123456, s[4:5]   ; encoding: [0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nand_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nand_b64 s[0:1], 0xaf123456, s[4:5]   ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_nand_b64 s[0:1], s[2:3], exec
 // GFX12: s_nand_b64 s[0:1], s[2:3], exec         ; encoding: [0x02,0x7e,0x80,0x8e]
@@ -3502,7 +3503,7 @@ s_nand_b64 s[0:1], s[2:3], 0x3f717273
 
 s_nand_b64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_nand_b64 s[0:1], s[2:3], 0xaf123456   ; encoding: [0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nand_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nand_b64 s[0:1], s[2:3], 0xaf123456   ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_nor_b32 s0, s1, s2
 // GFX12: s_nor_b32 s0, s1, s2                    ; encoding: [0x01,0x02,0x00,0x8f]
@@ -3662,7 +3663,7 @@ s_nor_b64 s[0:1], 0x3f717273, s[4:5]
 
 s_nor_b64 s[0:1], 0xaf123456, s[4:5]
 // GFX1200: s_nor_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nor_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_nor_b64 s[0:1], s[2:3], exec
 // GFX12: s_nor_b64 s[0:1], s[2:3], exec          ; encoding: [0x02,0x7e,0x80,0x8f]
@@ -3687,7 +3688,7 @@ s_nor_b64 s[0:1], s[2:3], 0x3f717273
 
 s_nor_b64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_nor_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf]
-// GFX1250: s_nor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_nor_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_xnor_b32 s0, s1, s2
 // GFX12: s_xnor_b32 s0, s1, s2                   ; encoding: [0x01,0x02,0x00,0x90]
@@ -3847,7 +3848,7 @@ s_xnor_b64 s[0:1], 0x3f717273, s[4:5]
 
 s_xnor_b64 s[0:1], 0xaf123456, s[4:5]
 // GFX1200: s_xnor_b64 s[0:1], 0xaf123456, s[4:5]   ; encoding: [0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xnor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xnor_b64 s[0:1], 0xaf123456, s[4:5]   ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_xnor_b64 s[0:1], s[2:3], exec
 // GFX12: s_xnor_b64 s[0:1], s[2:3], exec         ; encoding: [0x02,0x7e,0x80,0x90]
@@ -3872,7 +3873,7 @@ s_xnor_b64 s[0:1], s[2:3], 0x3f717273
 
 s_xnor_b64 s[0:1], s[2:3], 0xaf123456
 // GFX1200: s_xnor_b64 s[0:1], s[2:3], 0xaf123456   ; encoding: [0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf]
-// GFX1250: s_xnor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_xnor_b64 s[0:1], s[2:3], 0xaf123456   ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_lshl_b32 s0, s1, s2
 // GFX12: s_lshl_b32 s0, s1, s2                   ; encoding: [0x01,0x02,0x00,0x84]
@@ -4032,7 +4033,7 @@ s_lshl_b64 s[0:1], 0x3f717273, s4
 
 s_lshl_b64 s[0:1], 0xaf123456, s4
 // GFX1200: s_lshl_b64 s[0:1], 0xaf123456, s4       ; encoding: [0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf]
-// GFX1250: s_lshl_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_lshl_b64 s[0:1], 0xaf123456, s4       ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_lshl_b64 s[0:1], s[2:3], exec_lo
 // GFX12: s_lshl_b64 s[0:1], s[2:3], exec_lo      ; encoding: [0x02,0x7e,0x80,0x84]
@@ -4216,7 +4217,7 @@ s_lshr_b64 s[0:1], 0x3f717273, s4
 
 s_lshr_b64 s[0:1], 0xaf123456, s4
 // GFX1200: s_lshr_b64 s[0:1], 0xaf123456, s4       ; encoding: [0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf]
-// GFX1250: s_lshr_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_lshr_b64 s[0:1], 0xaf123456, s4       ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_lshr_b64 s[0:1], s[2:3], exec_lo
 // GFX12: s_lshr_b64 s[0:1], s[2:3], exec_lo      ; encoding: [0x02,0x7e,0x80,0x85]
@@ -4400,7 +4401,7 @@ s_ashr_i64 s[0:1], 0x3f717273, s4
 
 s_ashr_i64 s[0:1], 0xaf123456, s4
 // GFX1200: s_ashr_i64 s[0:1], 0xaf123456, s4       ; encoding: [0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf]
-// GFX1250: s_ashr_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_ashr_i64 s[0:1], 0xaf123456, s4       ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_ashr_i64 s[0:1], s[2:3], exec_lo
 // GFX12: s_ashr_i64 s[0:1], s[2:3], exec_lo      ; encoding: [0x02,0x7e,0x80,0x86]
@@ -4995,7 +4996,7 @@ s_bfe_u64 s[0:1], 0x3f717273, s4
 
 s_bfe_u64 s[0:1], 0xaf123456, s4
 // GFX1200: s_bfe_u64 s[0:1], 0xaf123456, s4        ; encoding: [0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf]
-// GFX1250: s_bfe_u64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_bfe_u64 s[0:1], 0xaf123456, s4        ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_bfe_u64 s[0:1], s[2:3], exec_lo
 // GFX12: s_bfe_u64 s[0:1], s[2:3], exec_lo       ; encoding: [0x02,0x7e,0x00,0x94]
@@ -5074,7 +5075,7 @@ s_bfe_i64 s[0:1], 0x3f717273, s4
 
 s_bfe_i64 s[0:1], 0xaf123456, s4
 // GFX1200: s_bfe_i64 s[0:1], 0xaf123456, s4        ; encoding: [0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf]
-// GFX1250: s_bfe_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_bfe_i64 s[0:1], 0xaf123456, s4        ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_bfe_i64 s[0:1], s[2:3], exec_lo
 // GFX12: s_bfe_i64 s[0:1], s[2:3], exec_lo       ; encoding: [0x02,0x7e,0x80,0x94]
@@ -6278,7 +6279,7 @@ s_and_not1_b64 s[10:11], vcc, ttmp[14:15]
 
 s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456
 // GFX1200: s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xff,0x8a,0x91,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_b64 s[10:11], ttmp[14:15], lit64(0xaf123456) ; encoding: [0x7a,0xfe,0x8a,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_and_not1_b64 s[10:11], exec, src_scc
 // GFX12: s_and_not1_b64 s[10:11], exec, src_scc  ; encoding: [0x7e,0xfd,0x8a,0x91]
@@ -6297,7 +6298,7 @@ s_and_not1_b64 exec, src_scc, exec
 
 s_and_not1_b64 null, 0xaf123456, vcc
 // GFX1200: s_and_not1_b64 null, 0xaf123456, vcc    ; encoding: [0xff,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf]
-// GFX1250: s_and_not1_b64 null, lit64(0xaf123456), vcc ; encoding: [0xfe,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_and_not1_b64 null, 0xaf123456, vcc    ; encoding: [0xfe,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_or_not1_b64 s[10:11], s[2:3], s[4:5]
 // GFX12: s_or_not1_b64 s[10:11], s[2:3], s[4:5]  ; encoding: [0x02,0x04,0x8a,0x92]
@@ -6310,7 +6311,7 @@ s_or_not1_b64 s[10:11], vcc, ttmp[14:15]
 
 s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456
 // GFX1200: s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xff,0x8a,0x92,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_b64 s[10:11], ttmp[14:15], lit64(0xaf123456) ; encoding: [0x7a,0xfe,0x8a,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_or_not1_b64 s[10:11], exec, src_scc
 // GFX12: s_or_not1_b64 s[10:11], exec, src_scc   ; encoding: [0x7e,0xfd,0x8a,0x92]
@@ -6329,4 +6330,4 @@ s_or_not1_b64 exec, src_scc, exec
 
 s_or_not1_b64 null, 0xaf123456, vcc
 // GFX1200: s_or_not1_b64 null, 0xaf123456, vcc     ; encoding: [0xff,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf]
-// GFX1250: s_or_not1_b64 null, lit64(0xaf123456), vcc ; encoding: [0xfe,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_or_not1_b64 null, 0xaf123456, vcc     ; encoding: [0xfe,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s
index cedba66d..98bb3c3 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s
@@ -1,6 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1200 %s
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
 
 s_cmp_lt_f32 s1, s2
 // GFX12: s_cmp_lt_f32 s1, s2                     ; encoding: [0x01,0x02,0x41,0xbf]
@@ -2119,7 +2120,7 @@ s_cmp_eq_u64 s[0:1], 0x3f717273
 
 s_cmp_eq_u64 s[0:1], 0xaf123456
 // GFX1200: s_cmp_eq_u64 s[0:1], 0xaf123456         ; encoding: [0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cmp_eq_u64 s[0:1], lit64(0xaf123456)  ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cmp_eq_u64 s[0:1], 0xaf123456         ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 s_cmp_lg_u64 s[0:1], s[2:3]
 // GFX12: s_cmp_lg_u64 s[0:1], s[2:3]             ; encoding: [0x00,0x02,0x11,0xbf]
@@ -2162,4 +2163,4 @@ s_cmp_lg_u64 s[0:1], 0x3f717273
 
 s_cmp_lg_u64 s[0:1], 0xaf123456
 // GFX1200: s_cmp_lg_u64 s[0:1], 0xaf123456         ; encoding: [0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf]
-// GFX1250: s_cmp_lg_u64 s[0:1], lit64(0xaf123456)  ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+// GFX1250: s_cmp_lg_u64 s[0:1], 0xaf123456         ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
index 43673d1..c96a72d 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s
@@ -2,707 +2,707 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck --check-prefix=GFX90A %s
 
 // GFX90A: flat_load_ubyte a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x40,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_ubyte a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x40,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_ubyte a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x40,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_ubyte a5, v[2:3]      ; encoding: [0x00,0x00,0x40,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte a5, v[2:3]
 
 // GFX90A: flat_load_ubyte a5, v[2:3]      ; encoding: [0x00,0x00,0x40,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte a5, v[2:3]
 
 // GFX90A: flat_load_ubyte a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x40,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte a5, v[2:3] offset:7
 
 // GFX90A: flat_load_ubyte a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x41,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_ubyte a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_sbyte a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_sbyte a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_sbyte a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_sbyte a5, v[2:3]      ; encoding: [0x00,0x00,0x44,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte a5, v[2:3]
 
 // GFX90A: flat_load_sbyte a5, v[2:3]      ; encoding: [0x00,0x00,0x44,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte a5, v[2:3]
 
 // GFX90A: flat_load_sbyte a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x44,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte a5, v[2:3] offset:7
 
 // GFX90A: flat_load_sbyte a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x45,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_sbyte a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_ushort a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ushort a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_ushort a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ushort a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_ushort a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ushort a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_ushort a5, v[2:3]     ; encoding: [0x00,0x00,0x48,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ushort a5, v[2:3]
 
 // GFX90A: flat_load_ushort a5, v[2:3]     ; encoding: [0x00,0x00,0x48,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ushort a5, v[2:3]
 
 // GFX90A: flat_load_ushort a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x48,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ushort a5, v[2:3] offset:7
 
 // GFX90A: flat_load_ushort a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x49,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ushort a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_ushort a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ushort a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_sshort a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sshort a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_sshort a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sshort a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_sshort a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sshort a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_sshort a5, v[2:3]     ; encoding: [0x00,0x00,0x4c,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sshort a5, v[2:3]
 
 // GFX90A: flat_load_sshort a5, v[2:3]     ; encoding: [0x00,0x00,0x4c,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sshort a5, v[2:3]
 
 // GFX90A: flat_load_sshort a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x4c,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sshort a5, v[2:3] offset:7
 
 // GFX90A: flat_load_sshort a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x4d,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sshort a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_sshort a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sshort a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_dword a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dword a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_dword a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dword a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_dword a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dword a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_dword a5, v[2:3]      ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dword a5, v[2:3]
 
 // GFX90A: flat_load_dword a5, v[2:3]      ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dword a5, v[2:3]
 
 // GFX90A: flat_load_dword a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x50,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dword a5, v[2:3] offset:7
 
 // GFX90A: flat_load_dword a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x51,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dword a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_dword a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dword a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx2 a[6:7], v[2:3] offset:4095
 
 // GFX90A: flat_load_dwordx2 a[254:255], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x02,0x00,0x80,0xfe]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx2 a[254:255], v[2:3] offset:4095
 
 // GFX90A: flat_load_dwordx2 a[6:7], v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0xfe,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx2 a[6:7], v[254:255] offset:4095
 
 // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] ; encoding: [0x00,0x00,0x54,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx2 a[6:7], v[2:3]
 
 // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] ; encoding: [0x00,0x00,0x54,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx2 a[6:7], v[2:3]
 
 // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] offset:7 ; encoding: [0x07,0x00,0x54,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx2 a[6:7], v[2:3] offset:7
 
 // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x55,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx2 a[6:7], v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx2 a[6:7], v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx3 a[6:8], v[2:3] offset:4095
 
 // GFX90A: flat_load_dwordx3 a[252:254], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x02,0x00,0x80,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx3 a[252:254], v[2:3] offset:4095
 
 // GFX90A: flat_load_dwordx3 a[6:8], v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0xfe,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx3 a[6:8], v[254:255] offset:4095
 
 // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] ; encoding: [0x00,0x00,0x58,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx3 a[6:8], v[2:3]
 
 // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] ; encoding: [0x00,0x00,0x58,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx3 a[6:8], v[2:3]
 
 // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] offset:7 ; encoding: [0x07,0x00,0x58,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx3 a[6:8], v[2:3] offset:7
 
 // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x59,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx3 a[6:8], v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx3 a[6:8], v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx4 a[6:9], v[2:3] offset:4095
 
 // GFX90A: flat_load_dwordx4 a[252:255], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x02,0x00,0x80,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx4 a[252:255], v[2:3] offset:4095
 
 // GFX90A: flat_load_dwordx4 a[6:9], v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0xfe,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx4 a[6:9], v[254:255] offset:4095
 
 // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx4 a[6:9], v[2:3]
 
 // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx4 a[6:9], v[2:3]
 
 // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] offset:7 ; encoding: [0x07,0x00,0x5c,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx4 a[6:9], v[2:3] offset:7
 
 // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x5d,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx4 a[6:9], v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xdc,0x02,0x00,0x80,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_dwordx4 a[6:9], v[2:3] offset:4095 slc
 
 // GFX90A: flat_store_byte v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte v[2:3], a2 offset:4095
 
 // GFX90A: flat_store_byte v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0xfe,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte v[254:255], a2 offset:4095
 
 // GFX90A: flat_store_byte v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0x02,0xff,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte v[2:3], a255 offset:4095
 
 // GFX90A: flat_store_byte v[2:3], a2      ; encoding: [0x00,0x00,0x60,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte v[2:3], a2
 
 // GFX90A: flat_store_byte v[2:3], a2      ; encoding: [0x00,0x00,0x60,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte v[2:3], a2
 
 // GFX90A: flat_store_byte v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x60,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte v[2:3], a2 offset:7
 
 // GFX90A: flat_store_byte v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x61,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_store_byte v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte v[2:3], a2 offset:4095 slc
 
 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte_d16_hi v[2:3], a2 offset:4095
 
 // GFX90A: flat_store_byte_d16_hi v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0xfe,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte_d16_hi v[254:255], a2 offset:4095
 
 // GFX90A: flat_store_byte_d16_hi v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x02,0xff,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte_d16_hi v[2:3], a255 offset:4095
 
 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 ; encoding: [0x00,0x00,0x64,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte_d16_hi v[2:3], a2
 
 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 ; encoding: [0x00,0x00,0x64,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte_d16_hi v[2:3], a2
 
 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x64,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte_d16_hi v[2:3], a2 offset:7
 
 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x65,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte_d16_hi v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_byte_d16_hi v[2:3], a2 offset:4095 slc
 
 // GFX90A: flat_store_short v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short v[2:3], a2 offset:4095
 
 // GFX90A: flat_store_short v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0xfe,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short v[254:255], a2 offset:4095
 
 // GFX90A: flat_store_short v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x02,0xff,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short v[2:3], a255 offset:4095
 
 // GFX90A: flat_store_short v[2:3], a2     ; encoding: [0x00,0x00,0x68,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short v[2:3], a2
 
 // GFX90A: flat_store_short v[2:3], a2     ; encoding: [0x00,0x00,0x68,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short v[2:3], a2
 
 // GFX90A: flat_store_short v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x68,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short v[2:3], a2 offset:7
 
 // GFX90A: flat_store_short v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x69,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_store_short v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short v[2:3], a2 offset:4095 slc
 
 // GFX90A: flat_store_short_d16_hi v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short_d16_hi v[2:3], a2 offset:4095
 
 // GFX90A: flat_store_short_d16_hi v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0xfe,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short_d16_hi v[254:255], a2 offset:4095
 
 // GFX90A: flat_store_short_d16_hi v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x02,0xff,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short_d16_hi v[2:3], a255 offset:4095
 
 // GFX90A: flat_store_short_d16_hi v[2:3], a2 ; encoding: [0x00,0x00,0x6c,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short_d16_hi v[2:3], a2
 
 // GFX90A: flat_store_short_d16_hi v[2:3], a2 ; encoding: [0x00,0x00,0x6c,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short_d16_hi v[2:3], a2
 
 // GFX90A: flat_store_short_d16_hi v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x6c,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short_d16_hi v[2:3], a2 offset:7
 
 // GFX90A: flat_store_short_d16_hi v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x6d,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short_d16_hi v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_store_short_d16_hi v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_short_d16_hi v[2:3], a2 offset:4095 slc
 
 // GFX90A: flat_store_dword v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dword v[2:3], a2 offset:4095
 
 // GFX90A: flat_store_dword v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0xfe,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dword v[254:255], a2 offset:4095
 
 // GFX90A: flat_store_dword v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x02,0xff,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dword v[2:3], a255 offset:4095
 
 // GFX90A: flat_store_dword v[2:3], a2     ; encoding: [0x00,0x00,0x70,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dword v[2:3], a2
 
 // GFX90A: flat_store_dword v[2:3], a2     ; encoding: [0x00,0x00,0x70,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dword v[2:3], a2
 
 // GFX90A: flat_store_dword v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x70,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dword v[2:3], a2 offset:7
 
 // GFX90A: flat_store_dword v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x71,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dword v[2:3], a2 offset:4095 glc
 
 // GFX90A: flat_store_dword v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dword v[2:3], a2 offset:4095 slc
 
 // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_store_dwordx2 v[254:255], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0xfe,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx2 v[254:255], a[2:3] offset:4095
 
 // GFX90A: flat_store_dwordx2 v[2:3], a[254:255] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x02,0xfe,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx2 v[2:3], a[254:255] offset:4095
 
 // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] ; encoding: [0x00,0x00,0x74,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx2 v[2:3], a[2:3]
 
 // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] ; encoding: [0x00,0x00,0x74,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx2 v[2:3], a[2:3]
 
 // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] offset:7 ; encoding: [0x07,0x00,0x74,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx2 v[2:3], a[2:3] offset:7
 
 // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x75,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx2 v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx2 v[2:3], a[2:3] offset:4095 slc
 
 // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx3 v[2:3], a[2:4] offset:4095
 
 // GFX90A: flat_store_dwordx3 v[254:255], a[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0xfe,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx3 v[254:255], a[2:4] offset:4095
 
 // GFX90A: flat_store_dwordx3 v[2:3], a[252:254] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x02,0xfc,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx3 v[2:3], a[252:254] offset:4095
 
 // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] ; encoding: [0x00,0x00,0x78,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx3 v[2:3], a[2:4]
 
 // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] ; encoding: [0x00,0x00,0x78,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx3 v[2:3], a[2:4]
 
 // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] offset:7 ; encoding: [0x07,0x00,0x78,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx3 v[2:3], a[2:4] offset:7
 
 // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] offset:4095 glc ; encoding: [0xff,0x0f,0x79,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx3 v[2:3], a[2:4] offset:4095 glc
 
 // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx3 v[2:3], a[2:4] offset:4095 slc
 
 // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx4 v[2:3], a[2:5] offset:4095
 
 // GFX90A: flat_store_dwordx4 v[254:255], a[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0xfe,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx4 v[254:255], a[2:5] offset:4095
 
 // GFX90A: flat_store_dwordx4 v[2:3], a[252:255] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x02,0xfc,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx4 v[2:3], a[252:255] offset:4095
 
 // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] ; encoding: [0x00,0x00,0x7c,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx4 v[2:3], a[2:5]
 
 // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] ; encoding: [0x00,0x00,0x7c,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx4 v[2:3], a[2:5]
 
 // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] offset:7 ; encoding: [0x07,0x00,0x7c,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx4 v[2:3], a[2:5] offset:7
 
 // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] offset:4095 glc ; encoding: [0xff,0x0f,0x7d,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx4 v[2:3], a[2:5] offset:4095 glc
 
 // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xdc,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_store_dwordx4 v[2:3], a[2:5] offset:4095 slc
 
 // GFX90A: flat_load_ubyte_d16 a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16 a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_ubyte_d16 a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16 a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_ubyte_d16 a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16 a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_ubyte_d16 a5, v[2:3]  ; encoding: [0x00,0x00,0x80,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16 a5, v[2:3]
 
 // GFX90A: flat_load_ubyte_d16 a5, v[2:3]  ; encoding: [0x00,0x00,0x80,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16 a5, v[2:3]
 
 // GFX90A: flat_load_ubyte_d16 a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x80,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16 a5, v[2:3] offset:7
 
 // GFX90A: flat_load_ubyte_d16 a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x81,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16 a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_ubyte_d16 a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16 a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16_hi a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_ubyte_d16_hi a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16_hi a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_ubyte_d16_hi a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16_hi a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x84,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16_hi a5, v[2:3]
 
 // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x84,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16_hi a5, v[2:3]
 
 // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x84,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16_hi a5, v[2:3] offset:7
 
 // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x85,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_sbyte_d16 a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16 a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_sbyte_d16 a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16 a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_sbyte_d16 a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16 a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_sbyte_d16 a5, v[2:3]  ; encoding: [0x00,0x00,0x88,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16 a5, v[2:3]
 
 // GFX90A: flat_load_sbyte_d16 a5, v[2:3]  ; encoding: [0x00,0x00,0x88,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16 a5, v[2:3]
 
 // GFX90A: flat_load_sbyte_d16 a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x88,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16 a5, v[2:3] offset:7
 
 // GFX90A: flat_load_sbyte_d16 a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x89,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16 a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_sbyte_d16 a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16 a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16_hi a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_sbyte_d16_hi a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16_hi a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_sbyte_d16_hi a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16_hi a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x8c,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16_hi a5, v[2:3]
 
 // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x8c,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16_hi a5, v[2:3]
 
 // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x8c,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16_hi a5, v[2:3] offset:7
 
 // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x8d,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_short_d16 a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16 a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_short_d16 a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16 a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_short_d16 a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16 a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_short_d16 a5, v[2:3]  ; encoding: [0x00,0x00,0x90,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16 a5, v[2:3]
 
 // GFX90A: flat_load_short_d16 a5, v[2:3]  ; encoding: [0x00,0x00,0x90,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16 a5, v[2:3]
 
 // GFX90A: flat_load_short_d16 a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x90,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16 a5, v[2:3] offset:7
 
 // GFX90A: flat_load_short_d16 a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x91,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16 a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_short_d16 a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16 a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_load_short_d16_hi a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16_hi a5, v[2:3] offset:4095
 
 // GFX90A: flat_load_short_d16_hi a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x02,0x00,0x80,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16_hi a255, v[2:3] offset:4095
 
 // GFX90A: flat_load_short_d16_hi a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0xfe,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16_hi a5, v[254:255] offset:4095
 
 // GFX90A: flat_load_short_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x94,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16_hi a5, v[2:3]
 
 // GFX90A: flat_load_short_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x94,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16_hi a5, v[2:3]
 
 // GFX90A: flat_load_short_d16_hi a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x94,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16_hi a5, v[2:3] offset:7
 
 // GFX90A: flat_load_short_d16_hi a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x95,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16_hi a5, v[2:3] offset:4095 glc
 
 // GFX90A: flat_load_short_d16_hi a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xdc,0x02,0x00,0x80,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_load_short_d16_hi a5, v[2:3] offset:4095 slc
 
 // GFX90A: flat_atomic_swap a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x01,0xdd,0x02,0x02,0x80,0x00]
@@ -810,371 +810,371 @@ flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc
 
 // GFX90A: flat_atomic_swap v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_swap v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_cmpswap v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x04,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_cmpswap v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_add v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x08,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_add v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_sub v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_sub v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_smin v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x10,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_smin v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_umin v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x14,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_umin v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_smax v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x18,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_smax v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_umax v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_umax v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_and v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x20,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_and v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_or v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x24,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_or v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_xor v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x28,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_xor v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_inc v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_inc v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_dec v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x30,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_dec v[2:3], a2 offset:4095
 
 // GFX90A: flat_atomic_swap_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_swap_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_cmpswap_x2 v[2:3], a[2:5] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_cmpswap_x2 v[2:3], a[2:5] offset:4095
 
 // GFX90A: flat_atomic_add_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_add_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_sub_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_sub_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_smin_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_smin_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_umin_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_umin_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_smax_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x98,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_smax_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_umax_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x9c,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_umax_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_and_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xa0,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_and_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_or_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xa4,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_or_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_xor_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xa8,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_xor_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_inc_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xac,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_inc_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: flat_atomic_dec_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xb0,0xdd,0x02,0x02,0x80,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 flat_atomic_dec_x2 v[2:3], a[2:3] offset:4095
 
 // GFX90A: global_load_ubyte a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x40,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ubyte a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_ubyte a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x40,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ubyte a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_ubyte a5, v[2:3], off ; encoding: [0x00,0x80,0x40,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ubyte a5, v[2:3], off
 
 // GFX90A: global_load_sbyte a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sbyte a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_sbyte a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sbyte a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_sbyte a5, v[2:3], off ; encoding: [0x00,0x80,0x44,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sbyte a5, v[2:3], off
 
 // GFX90A: global_load_ushort a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ushort a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_ushort a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ushort a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_ushort a5, v[2:3], off ; encoding: [0x00,0x80,0x48,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ushort a5, v[2:3], off
 
 // GFX90A: global_load_sshort a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sshort a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_sshort a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sshort a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_sshort a5, v[2:3], off ; encoding: [0x00,0x80,0x4c,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sshort a5, v[2:3], off
 
 // GFX90A: global_load_dword a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dword a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_dword a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dword a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_dword a5, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dword a5, v[2:3], off
 
 // GFX90A: global_load_dwordx2 a[6:7], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x02,0x00,0xff,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dwordx2 a[6:7], v[2:3], off offset:-1
 
 // GFX90A: global_load_dwordx2 a[254:255], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x02,0x00,0xff,0xfe]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dwordx2 a[254:255], v[2:3], off offset:-1
 
 // GFX90A: global_load_dwordx2 a[6:7], v[2:3], off ; encoding: [0x00,0x80,0x54,0xdc,0x02,0x00,0xff,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dwordx2 a[6:7], v[2:3], off
 
 // GFX90A: global_load_dwordx3 a[6:8], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x02,0x00,0xff,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dwordx3 a[6:8], v[2:3], off offset:-1
 
 // GFX90A: global_load_dwordx3 a[252:254], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x02,0x00,0xff,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dwordx3 a[252:254], v[2:3], off offset:-1
 
 // GFX90A: global_load_dwordx3 a[6:8], v[2:3], off ; encoding: [0x00,0x80,0x58,0xdc,0x02,0x00,0xff,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dwordx3 a[6:8], v[2:3], off
 
 // GFX90A: global_load_dwordx4 a[6:9], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x02,0x00,0xff,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dwordx4 a[6:9], v[2:3], off offset:-1
 
 // GFX90A: global_load_dwordx4 a[252:255], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x02,0x00,0xff,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dwordx4 a[252:255], v[2:3], off offset:-1
 
 // GFX90A: global_load_dwordx4 a[6:9], v[2:3], off ; encoding: [0x00,0x80,0x5c,0xdc,0x02,0x00,0xff,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_dwordx4 a[6:9], v[2:3], off
 
 // GFX90A: global_store_byte v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_byte v[2:3], a2, off offset:-1
 
 // GFX90A: global_store_byte v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x02,0xff,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_byte v[2:3], a255, off offset:-1
 
 // GFX90A: global_store_byte v[2:3], a2, off ; encoding: [0x00,0x80,0x60,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_byte v[2:3], a2, off
 
 // GFX90A: global_store_byte_d16_hi v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_byte_d16_hi v[2:3], a2, off offset:-1
 
 // GFX90A: global_store_byte_d16_hi v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x02,0xff,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_byte_d16_hi v[2:3], a255, off offset:-1
 
 // GFX90A: global_store_byte_d16_hi v[2:3], a2, off ; encoding: [0x00,0x80,0x64,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_byte_d16_hi v[2:3], a2, off
 
 // GFX90A: global_store_short v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_short v[2:3], a2, off offset:-1
 
 // GFX90A: global_store_short v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x02,0xff,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_short v[2:3], a255, off offset:-1
 
 // GFX90A: global_store_short v[2:3], a2, off ; encoding: [0x00,0x80,0x68,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_short v[2:3], a2, off
 
 // GFX90A: global_store_short_d16_hi v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_short_d16_hi v[2:3], a2, off offset:-1
 
 // GFX90A: global_store_short_d16_hi v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x02,0xff,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_short_d16_hi v[2:3], a255, off offset:-1
 
 // GFX90A: global_store_short_d16_hi v[2:3], a2, off ; encoding: [0x00,0x80,0x6c,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_short_d16_hi v[2:3], a2, off
 
 // GFX90A: global_store_dword v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dword v[2:3], a2, off offset:-1
 
 // GFX90A: global_store_dword v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x02,0xff,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dword v[2:3], a255, off offset:-1
 
 // GFX90A: global_store_dword v[2:3], a2, off ; encoding: [0x00,0x80,0x70,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dword v[2:3], a2, off
 
 // GFX90A: global_store_dwordx2 v[2:3], a[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dwordx2 v[2:3], a[2:3], off offset:-1
 
 // GFX90A: global_store_dwordx2 v[2:3], a[254:255], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x02,0xfe,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dwordx2 v[2:3], a[254:255], off offset:-1
 
 // GFX90A: global_store_dwordx2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x74,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dwordx2 v[2:3], a[2:3], off
 
 // GFX90A: global_store_dwordx3 v[2:3], a[2:4], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dwordx3 v[2:3], a[2:4], off offset:-1
 
 // GFX90A: global_store_dwordx3 v[2:3], a[252:254], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x02,0xfc,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dwordx3 v[2:3], a[252:254], off offset:-1
 
 // GFX90A: global_store_dwordx3 v[2:3], a[2:4], off ; encoding: [0x00,0x80,0x78,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dwordx3 v[2:3], a[2:4], off
 
 // GFX90A: global_store_dwordx4 v[2:3], a[2:5], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dwordx4 v[2:3], a[2:5], off offset:-1
 
 // GFX90A: global_store_dwordx4 v[2:3], a[252:255], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x02,0xfc,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dwordx4 v[2:3], a[252:255], off offset:-1
 
 // GFX90A: global_store_dwordx4 v[2:3], a[2:5], off ; encoding: [0x00,0x80,0x7c,0xdc,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_store_dwordx4 v[2:3], a[2:5], off
 
 // GFX90A: global_load_ubyte_d16 a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ubyte_d16 a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_ubyte_d16 a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ubyte_d16 a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_ubyte_d16 a5, v[2:3], off ; encoding: [0x00,0x80,0x80,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ubyte_d16 a5, v[2:3], off
 
 // GFX90A: global_load_ubyte_d16_hi a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ubyte_d16_hi a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_ubyte_d16_hi a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ubyte_d16_hi a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_ubyte_d16_hi a5, v[2:3], off ; encoding: [0x00,0x80,0x84,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_ubyte_d16_hi a5, v[2:3], off
 
 // GFX90A: global_load_sbyte_d16 a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sbyte_d16 a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_sbyte_d16 a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sbyte_d16 a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_sbyte_d16 a5, v[2:3], off ; encoding: [0x00,0x80,0x88,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sbyte_d16 a5, v[2:3], off
 
 // GFX90A: global_load_sbyte_d16_hi a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sbyte_d16_hi a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_sbyte_d16_hi a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sbyte_d16_hi a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_sbyte_d16_hi a5, v[2:3], off ; encoding: [0x00,0x80,0x8c,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_sbyte_d16_hi a5, v[2:3], off
 
 // GFX90A: global_load_short_d16 a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_short_d16 a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_short_d16 a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_short_d16 a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_short_d16 a5, v[2:3], off ; encoding: [0x00,0x80,0x90,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_short_d16 a5, v[2:3], off
 
 // GFX90A: global_load_short_d16_hi a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_short_d16_hi a5, v[2:3], off offset:-1
 
 // GFX90A: global_load_short_d16_hi a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x02,0x00,0xff,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_short_d16_hi a255, v[2:3], off offset:-1
 
 // GFX90A: global_load_short_d16_hi a5, v[2:3], off ; encoding: [0x00,0x80,0x94,0xdc,0x02,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_load_short_d16_hi a5, v[2:3], off
 
 // GFX90A: global_atomic_swap a1, v[2:3], a2, off glc ; encoding: [0x00,0x80,0x01,0xdd,0x02,0x02,0xff,0x01]
@@ -1282,5815 +1282,5815 @@ global_atomic_inc_x2 a[2:3], v[2:3], a[2:3], off glc
 global_atomic_dec_x2 a[2:3], v[2:3], a[2:3], off glc
 
 // GFX90A: global_atomic_swap v[2:3], a2, off ; encoding: [0x00,0x80,0x00,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_swap v[2:3], a2, off
 
 // GFX90A: global_atomic_cmpswap v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x04,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_cmpswap v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_add v[2:3], a2, off ; encoding: [0x00,0x80,0x08,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_add v[2:3], a2, off
 
 // GFX90A: global_atomic_sub v[2:3], a2, off ; encoding: [0x00,0x80,0x0c,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_sub v[2:3], a2, off
 
 // GFX90A: global_atomic_smin v[2:3], a2, off ; encoding: [0x00,0x80,0x10,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_smin v[2:3], a2, off
 
 // GFX90A: global_atomic_umin v[2:3], a2, off ; encoding: [0x00,0x80,0x14,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_umin v[2:3], a2, off
 
 // GFX90A: global_atomic_smax v[2:3], a2, off ; encoding: [0x00,0x80,0x18,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_smax v[2:3], a2, off
 
 // GFX90A: global_atomic_umax v[2:3], a2, off ; encoding: [0x00,0x80,0x1c,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_umax v[2:3], a2, off
 
 // GFX90A: global_atomic_and v[2:3], a2, off ; encoding: [0x00,0x80,0x20,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_and v[2:3], a2, off
 
 // GFX90A: global_atomic_or v[2:3], a2, off ; encoding: [0x00,0x80,0x24,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_or v[2:3], a2, off
 
 // GFX90A: global_atomic_xor v[2:3], a2, off ; encoding: [0x00,0x80,0x28,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_xor v[2:3], a2, off
 
 // GFX90A: global_atomic_inc v[2:3], a2, off ; encoding: [0x00,0x80,0x2c,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_inc v[2:3], a2, off
 
 // GFX90A: global_atomic_dec v[2:3], a2, off ; encoding: [0x00,0x80,0x30,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_dec v[2:3], a2, off
 
 // GFX90A: global_atomic_swap_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x80,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_swap_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_cmpswap_x2 v[2:3], a[2:5], off ; encoding: [0x00,0x80,0x84,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_cmpswap_x2 v[2:3], a[2:5], off
 
 // GFX90A: global_atomic_add_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x88,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_add_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_sub_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x8c,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_sub_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_smin_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x90,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_smin_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_umin_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x94,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_umin_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_smax_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x98,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_smax_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_umax_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x9c,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_umax_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_and_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xa0,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_and_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_or_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xa4,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_or_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_xor_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xa8,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_xor_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_inc_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xac,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_inc_x2 v[2:3], a[2:3], off
 
 // GFX90A: global_atomic_dec_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xb0,0xdd,0x02,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 global_atomic_dec_x2 v[2:3], a[2:3], off
 
 // GFX90A: scratch_load_ubyte a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_ubyte a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_ubyte a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_ubyte a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_ubyte a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_ubyte a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_ubyte a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_ubyte a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, v0, off offset:-1
 
 // GFX90A: scratch_load_ubyte a5, off, s2  ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, s2
 
 // GFX90A: scratch_load_ubyte a5, off, s2  ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, s2
 
 // GFX90A: scratch_load_ubyte a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x40,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_ubyte a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x40,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_ubyte a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_ubyte a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_sbyte a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_sbyte a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_sbyte a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_sbyte a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_sbyte a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_sbyte a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_sbyte a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_sbyte a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, v0, off offset:-1
 
 // GFX90A: scratch_load_sbyte a5, off, s2  ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, s2
 
 // GFX90A: scratch_load_sbyte a5, off, s2  ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, s2
 
 // GFX90A: scratch_load_sbyte a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x44,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_sbyte a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x44,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_sbyte a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_sbyte a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_ushort a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_ushort a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_ushort a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_ushort a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_ushort a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_ushort a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_ushort a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_ushort a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, v0, off offset:-1
 
 // GFX90A: scratch_load_ushort a5, off, s2 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, s2
 
 // GFX90A: scratch_load_ushort a5, off, s2 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, s2
 
 // GFX90A: scratch_load_ushort a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x48,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_ushort a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x48,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_ushort a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_ushort a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ushort a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_sshort a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_sshort a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_sshort a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_sshort a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_sshort a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_sshort a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_sshort a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_sshort a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, v0, off offset:-1
 
 // GFX90A: scratch_load_sshort a5, off, s2 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, s2
 
 // GFX90A: scratch_load_sshort a5, off, s2 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, s2
 
 // GFX90A: scratch_load_sshort a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x4c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_sshort a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x4c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_sshort a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_sshort a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sshort a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_dword a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_dword a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_dword a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_dword a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_dword a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_dword a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_dword a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_dword a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, v0, off offset:-1
 
 // GFX90A: scratch_load_dword a5, off, s2  ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, s2
 
 // GFX90A: scratch_load_dword a5, off, s2  ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, s2
 
 // GFX90A: scratch_load_dword a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x50,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_dword a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x50,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_dword a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_dword a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dword a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, s2 offset:-1
 
 // GFX90A: scratch_load_dwordx2 a[254:255], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0xfe]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[254:255], off, s2 offset:-1
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, s101 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xe5,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, s101 offset:-1
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xe6,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xe7,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xea,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xeb,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_dwordx2 a[6:7], v0, off offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xff,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], v0, off offset:-1
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, s2
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, s2
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:4095 ; encoding: [0xff,0x4f,0x54,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, s2 offset:4095
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:-4096 ; encoding: [0x00,0x50,0x54,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, s2 offset:-4096
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx2 a[6:7], off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, s2 offset:-1
 
 // GFX90A: scratch_load_dwordx3 a[252:254], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[252:254], off, s2 offset:-1
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, s101 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xe5,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, s101 offset:-1
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xe6,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xe7,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xea,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xeb,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_dwordx3 a[6:8], v0, off offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xff,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], v0, off offset:-1
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, s2
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, s2
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:4095 ; encoding: [0xff,0x4f,0x58,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, s2 offset:4095
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:-4096 ; encoding: [0x00,0x50,0x58,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, s2 offset:-4096
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx3 a[6:8], off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, s2 offset:-1
 
 // GFX90A: scratch_load_dwordx4 a[252:255], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[252:255], off, s2 offset:-1
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, s101 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xe5,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, s101 offset:-1
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xe6,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xe7,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xea,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xeb,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_dwordx4 a[6:9], v0, off offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xff,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], v0, off offset:-1
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, s2
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, s2
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:4095 ; encoding: [0xff,0x4f,0x5c,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, s2 offset:4095
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:-4096 ; encoding: [0x00,0x50,0x5c,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, s2 offset:-4096
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_dwordx4 a[6:9], off, s2 offset:-1 slc
 
 // GFX90A: scratch_store_byte off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, s3 offset:-1
 
 // GFX90A: scratch_store_byte off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0xff,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a255, s3 offset:-1
 
 // GFX90A: scratch_store_byte off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xe5,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, s101 offset:-1
 
 // GFX90A: scratch_store_byte off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xe6,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_store_byte off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xe7,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_store_byte off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xea,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, vcc_lo offset:-1
 
 // GFX90A: scratch_store_byte off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xeb,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, vcc_hi offset:-1
 
 // GFX90A: scratch_store_byte v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte v0, a2, off offset:-1
 
 // GFX90A: scratch_store_byte off, a2, s3  ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, s3
 
 // GFX90A: scratch_store_byte off, a2, s3  ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, s3
 
 // GFX90A: scratch_store_byte off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x60,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, s3 offset:4095
 
 // GFX90A: scratch_store_byte off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x60,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, s3 offset:-4096
 
 // GFX90A: scratch_store_byte off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, s3 offset:-1 glc
 
 // GFX90A: scratch_store_byte off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte off, a2, s3 offset:-1 slc
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, s3 offset:-1
 
 // GFX90A: scratch_store_byte_d16_hi off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0xff,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a255, s3 offset:-1
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xe5,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, s101 offset:-1
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xe6,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xe7,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xea,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, vcc_lo offset:-1
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xeb,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, vcc_hi offset:-1
 
 // GFX90A: scratch_store_byte_d16_hi v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi v0, a2, off offset:-1
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, s3
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, s3
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x64,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, s3 offset:4095
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x64,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, s3 offset:-4096
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, s3 offset:-1 glc
 
 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_byte_d16_hi off, a2, s3 offset:-1 slc
 
 // GFX90A: scratch_store_short off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, s3 offset:-1
 
 // GFX90A: scratch_store_short off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0xff,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a255, s3 offset:-1
 
 // GFX90A: scratch_store_short off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xe5,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, s101 offset:-1
 
 // GFX90A: scratch_store_short off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xe6,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_store_short off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xe7,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_store_short off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xea,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, vcc_lo offset:-1
 
 // GFX90A: scratch_store_short off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xeb,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, vcc_hi offset:-1
 
 // GFX90A: scratch_store_short v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short v0, a2, off offset:-1
 
 // GFX90A: scratch_store_short off, a2, s3 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, s3
 
 // GFX90A: scratch_store_short off, a2, s3 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, s3
 
 // GFX90A: scratch_store_short off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x68,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, s3 offset:4095
 
 // GFX90A: scratch_store_short off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x68,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, s3 offset:-4096
 
 // GFX90A: scratch_store_short off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, s3 offset:-1 glc
 
 // GFX90A: scratch_store_short off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short off, a2, s3 offset:-1 slc
 
 // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, s3 offset:-1
 
 // GFX90A: scratch_store_short_d16_hi off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0xff,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a255, s3 offset:-1
 
 // GFX90A: scratch_store_short_d16_hi off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xe5,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, s101 offset:-1
 
 // GFX90A: scratch_store_short_d16_hi off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xe6,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_store_short_d16_hi off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xe7,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_store_short_d16_hi off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xea,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, vcc_lo offset:-1
 
 // GFX90A: scratch_store_short_d16_hi off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xeb,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, vcc_hi offset:-1
 
 // GFX90A: scratch_store_short_d16_hi v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi v0, a2, off offset:-1
 
 // GFX90A: scratch_store_short_d16_hi off, a2, s3 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, s3
 
 // GFX90A: scratch_store_short_d16_hi off, a2, s3 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, s3
 
 // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x6c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, s3 offset:4095
 
 // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x6c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, s3 offset:-4096
 
 // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, s3 offset:-1 glc
 
 // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_short_d16_hi off, a2, s3 offset:-1 slc
 
 // GFX90A: scratch_store_dword off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, s3 offset:-1
 
 // GFX90A: scratch_store_dword off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0xff,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a255, s3 offset:-1
 
 // GFX90A: scratch_store_dword off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xe5,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, s101 offset:-1
 
 // GFX90A: scratch_store_dword off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xe6,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_store_dword off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xe7,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_store_dword off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xea,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, vcc_lo offset:-1
 
 // GFX90A: scratch_store_dword off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xeb,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, vcc_hi offset:-1
 
 // GFX90A: scratch_store_dword v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword v0, a2, off offset:-1
 
 // GFX90A: scratch_store_dword off, a2, s3 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, s3
 
 // GFX90A: scratch_store_dword off, a2, s3 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, s3
 
 // GFX90A: scratch_store_dword off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x70,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, s3 offset:4095
 
 // GFX90A: scratch_store_dword off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x70,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, s3 offset:-4096
 
 // GFX90A: scratch_store_dword off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, s3 offset:-1 glc
 
 // GFX90A: scratch_store_dword off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dword off, a2, s3 offset:-1 slc
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], s3 offset:-1
 
 // GFX90A: scratch_store_dwordx2 off, a[254:255], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0xfe,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[254:255], s3 offset:-1
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], s101 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xe5,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], s101 offset:-1
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xe6,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], flat_scratch_lo offset:-1
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xe7,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], flat_scratch_hi offset:-1
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xea,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], vcc_lo offset:-1
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xeb,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], vcc_hi offset:-1
 
 // GFX90A: scratch_store_dwordx2 v0, a[2:3], off offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 v0, a[2:3], off offset:-1
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], s3
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], s3
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:4095 ; encoding: [0xff,0x4f,0x74,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], s3 offset:4095
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:-4096 ; encoding: [0x00,0x50,0x74,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], s3 offset:-4096
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:-1 glc ; encoding: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], s3 offset:-1 glc
 
 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx2 off, a[2:3], s3 offset:-1 slc
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], s3 offset:-1
 
 // GFX90A: scratch_store_dwordx3 off, a[252:254], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0xfc,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[252:254], s3 offset:-1
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], s101 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xe5,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], s101 offset:-1
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xe6,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], flat_scratch_lo offset:-1
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xe7,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], flat_scratch_hi offset:-1
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xea,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], vcc_lo offset:-1
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xeb,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], vcc_hi offset:-1
 
 // GFX90A: scratch_store_dwordx3 v0, a[2:4], off offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 v0, a[2:4], off offset:-1
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], s3
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], s3
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:4095 ; encoding: [0xff,0x4f,0x78,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], s3 offset:4095
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:-4096 ; encoding: [0x00,0x50,0x78,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], s3 offset:-4096
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:-1 glc ; encoding: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], s3 offset:-1 glc
 
 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx3 off, a[2:4], s3 offset:-1 slc
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], s3 offset:-1
 
 // GFX90A: scratch_store_dwordx4 off, a[252:255], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0xfc,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[252:255], s3 offset:-1
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], s101 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xe5,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], s101 offset:-1
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xe6,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], flat_scratch_lo offset:-1
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xe7,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], flat_scratch_hi offset:-1
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xea,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], vcc_lo offset:-1
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xeb,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], vcc_hi offset:-1
 
 // GFX90A: scratch_store_dwordx4 v0, a[2:5], off offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xff,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 v0, a[2:5], off offset:-1
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], s3
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], s3
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:4095 ; encoding: [0xff,0x4f,0x7c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], s3 offset:4095
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:-4096 ; encoding: [0x00,0x50,0x7c,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], s3 offset:-4096
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:-1 glc ; encoding: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], s3 offset:-1 glc
 
 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_store_dwordx4 off, a[2:5], s3 offset:-1 slc
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_ubyte_d16 a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_ubyte_d16 a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, v0, off offset:-1
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, s2
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, s2
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x80,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x80,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16 a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_ubyte_d16_hi a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, v0, off offset:-1
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, s2
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, s2
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x84,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x84,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_sbyte_d16 a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_sbyte_d16 a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, v0, off offset:-1
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, s2
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, s2
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x88,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x88,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16 a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_sbyte_d16_hi a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, v0, off offset:-1
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, s2
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, s2
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x8c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x8c,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_short_d16 a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_short_d16 a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_short_d16 a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_short_d16 a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_short_d16 a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_short_d16 a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_short_d16 a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_short_d16 a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, v0, off offset:-1
 
 // GFX90A: scratch_load_short_d16 a5, off, s2 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, s2
 
 // GFX90A: scratch_load_short_d16 a5, off, s2 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, s2
 
 // GFX90A: scratch_load_short_d16 a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x90,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_short_d16 a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x90,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_short_d16 a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_short_d16 a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16 a5, off, s2 offset:-1 slc
 
 // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, s2 offset:-1
 
 // GFX90A: scratch_load_short_d16_hi a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a255, off, s2 offset:-1
 
 // GFX90A: scratch_load_short_d16_hi a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xe5,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, s101 offset:-1
 
 // GFX90A: scratch_load_short_d16_hi a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xe6,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, flat_scratch_lo offset:-1
 
 // GFX90A: scratch_load_short_d16_hi a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xe7,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, flat_scratch_hi offset:-1
 
 // GFX90A: scratch_load_short_d16_hi a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xea,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, vcc_lo offset:-1
 
 // GFX90A: scratch_load_short_d16_hi a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xeb,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, vcc_hi offset:-1
 
 // GFX90A: scratch_load_short_d16_hi a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xff,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, v0, off offset:-1
 
 // GFX90A: scratch_load_short_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, s2
 
 // GFX90A: scratch_load_short_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, s2
 
 // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x94,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, s2 offset:4095
 
 // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x94,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, s2 offset:-4096
 
 // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, s2 offset:-1 glc
 
 // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 scratch_load_short_d16_hi a5, off, s2 offset:-1 slc
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_x a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_x a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_format_x a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_format_x a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x00,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_format_x a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x00,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x00,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_x a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_xy a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x04,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x04,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x04,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x04,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[252:254], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0xfc,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[252:254], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x08,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x08,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x08,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x08,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[252:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0xfc,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[252:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x0c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x0c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x0c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x0c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x0e,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_x a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0xff,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a255, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_x a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_format_x a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_format_x a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x10,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_format_x a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x10,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x10,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x10,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x12,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_x a1, off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_xy a[254:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0xfe,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[254:255], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x14,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x14,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x14,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x14,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x16,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[252:254], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0xfc,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[252:254], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x18,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x18,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x18,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x18,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x1a,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[252:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0xfc,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[252:255], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x1c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x1c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x1c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x1c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x1e,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x20,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x20,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x20,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x20,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x22,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x24,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x24,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x24,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x24,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x26,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x28,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x28,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x28,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x28,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x2a,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x2c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x2c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x2c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x2c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x2e,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0xff,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a255, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x30,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x30,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x30,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x30,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x32,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0xff,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a255, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x34,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x34,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x34,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x34,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x36,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[254:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0xfe,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[254:255], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x38,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x38,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x38,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x38,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x3a,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[254:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0xfe,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[254:255], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x3c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x3c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x3c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x3c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x3c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x3c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x3e,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_ubyte a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x40,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_ubyte a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x40,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x40,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x40,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_sbyte a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x44,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_sbyte a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x44,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x44,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x44,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_ushort a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_ushort a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_ushort a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_ushort a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x48,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_ushort a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x48,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x48,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x48,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ushort a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_sshort a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_sshort a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_sshort a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_sshort a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x4c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_sshort a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x4c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x4c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x4c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sshort a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_dword a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_dword a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_dword a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_dword a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x50,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_dword a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x50,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x50,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x50,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dword a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x54,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x54,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x54,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x54,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[252:254], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0xfc,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[252:254], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x58,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x58,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 ; encoding: [0x00,0x00,0x58,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], s3
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 ; encoding: [0x00,0x00,0x58,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], s3
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x58,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x58,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[252:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0xfc,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[252:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x5c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x5c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], s3
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], s3
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x5c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x5c,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xe0,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_byte a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0xff,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a255, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_byte a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_byte a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_byte a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x60,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_byte a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x60,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x60,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x60,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte a1, off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0xff,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a255, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x64,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x64,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x64,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x64,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_short a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_short a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0xff,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a255, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_short a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_short a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_short a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_short a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_short a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_short a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_short a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_short a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_short a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x68,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_short a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x68,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_short a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_short a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_short a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x68,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_short a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x68,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_short a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short a1, off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0xff,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a255, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x6c,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x6c,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x6c,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x6c,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x6c,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x6c,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_dword a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0xff,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a255, off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_dword a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_dword a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_dword a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x70,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_dword a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x70,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], s4
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x70,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x70,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xe0,0x00,0x01,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dword a1, off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[254:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0xfe,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[254:255], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x74,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x74,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], s4
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], s4
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x74,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x74,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[252:254], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0xfc,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[252:254], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x78,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x78,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], s4
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], s4
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x78,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x78,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[252:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0xfc,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[252:255], off, s[12:15], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x84,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[16:19], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x98,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[96:99], s4 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], s101 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], m0 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], 0 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], -1 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], 0.5 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], -4.0 offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x7c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], v0, s[12:15], s4 idxen offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x7c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], v0, s[12:15], s4 offen offset:4095
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], s4
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], s4
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x7c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:7
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x7c,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 glc
 
 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xe0,0x00,0x02,0x83,0x04]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 slc
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x80,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x80,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x80,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x80,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x80,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x84,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x84,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x84,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x84,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x84,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x84,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x88,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x88,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x88,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x88,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x88,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x88,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x8c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x8c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x8c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x8c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x8c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x8c,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_short_d16 a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x90,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x90,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x90,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x90,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x90,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x90,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0xf0]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], 0.5 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0xf7]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], -4.0 offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x94,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x94,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x94,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x94,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], s3
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x94,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x94,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xe0,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_swap a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_swap a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_swap a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_swap a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x00,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_swap a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x00,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x00,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x00,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x00,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x04,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x04,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x04,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x04,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x04,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x04,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_add a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_add a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_add a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_add a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x08,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_add a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x08,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x08,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x08,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x08,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x08,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_sub a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_sub a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_sub a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_sub a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x0c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_sub a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x0c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x0c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x0c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x0c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x0c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x0e,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_smin a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_smin a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_smin a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_smin a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x10,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_smin a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x10,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x10,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x10,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x10,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x10,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x12,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_umin a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_umin a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_umin a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_umin a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x14,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_umin a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x14,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x14,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x14,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x14,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x14,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x16,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_smax a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_smax a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_smax a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_smax a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x18,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_smax a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x18,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x18,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x18,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x18,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x18,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x1a,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_umax a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_umax a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_umax a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_umax a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x1c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_umax a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x1c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x1c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x1c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x1c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x1c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x1e,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_and a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_and a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_and a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_and a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x20,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_and a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x20,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x20,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x20,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x20,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x20,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x22,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_or a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_or a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_or a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_or a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x24,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_or a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x24,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x24,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x24,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x24,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x24,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x26,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_xor a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_xor a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_xor a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_xor a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x28,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_xor a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x28,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x28,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x28,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x28,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x28,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x2a,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_inc a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_inc a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_inc a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_inc a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x2c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_inc a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x2c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x2c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x2c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x2c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x2c,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x2e,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_dec a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0xff,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a255, off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_dec a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_dec a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_dec a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x30,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_dec a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x30,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x30,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x30,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], s3
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x30,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x30,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x32,0xe1,0x00,0x05,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x80,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x80,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x80,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x80,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x80,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[252:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0xfc,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[252:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x84,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x84,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x84,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x84,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x84,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x84,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x88,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x88,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x88,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x88,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x88,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x88,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x8c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x8c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x8c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x8c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x8c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x8c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x90,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x90,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x90,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x90,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x90,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x90,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x94,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x94,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x94,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x94,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x94,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x94,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x98,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x98,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x98,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x98,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x98,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x98,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x9a,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x9c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x9c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x9c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x9c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x9c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x9c,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x9e,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xa0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xa0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xa0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xa0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xa2,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xa4,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xa4,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa4,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa4,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xa4,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xa4,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xa6,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xa8,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xa8,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa8,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa8,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xa8,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xa8,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xaa,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xac,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xac,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xac,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xac,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xac,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xac,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xae,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0xfe,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[254:255], off, s[8:11], s3 offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x83,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[12:15], s3 offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x98,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[96:99], s3 offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0x65]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], s101 offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0x7c]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], m0 offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], 0 offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0xc1]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], -1 offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xb0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xb0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], v0, s[8:11], s3 offen offset:4095
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xb0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xb0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xb0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:7
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xb0,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 glc
 
 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xb2,0xe1,0x00,0x06,0x82,0x03]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 slc
 
 // GFX90A: tbuffer_load_format_x a1, off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x78,0xe9,0x00,0x01,0x81,0x01]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_load_format_x a1, off, s[4:7],  dfmt:15, nfmt:2, s1
 
 // GFX90A: tbuffer_load_format_xy a[2:3], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x78,0xe9,0x00,0x02,0x81,0x01]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_load_format_xy a[2:3], off, s[4:7],  dfmt:15, nfmt:2, s1
 
 // GFX90A: tbuffer_load_format_xyz a[2:4], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x79,0xe9,0x00,0x02,0x81,0x01]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_load_format_xyz a[2:4], off, s[4:7],  dfmt:15, nfmt:2, s1
 
 // GFX90A: tbuffer_load_format_xyzw a[2:5], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x79,0xe9,0x00,0x02,0x81,0x01]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_load_format_xyzw a[2:5], off, s[4:7],  dfmt:15, nfmt:2, s1
 
 // GFX90A: tbuffer_store_format_x a1, off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x7a,0xe9,0x00,0x01,0x81,0x01]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_store_format_x a1, off, s[4:7],  dfmt:15, nfmt:2, s1
 
 // GFX90A: tbuffer_store_format_xy a[2:3], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7a,0xe9,0x00,0x02,0x81,0x01]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_store_format_xy a[2:3], off, s[4:7],  dfmt:15, nfmt:2, s1
 
 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x02,0x81,0x01]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_store_format_xyzw a[2:5], off, s[4:7],  dfmt:15, nfmt:2, s1
 
 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], ttmp1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x02,0x9c,0x6d]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7],  dfmt:15, nfmt:2, ttmp1
 
 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], ttmp1 format:[BUF_DATA_FORMAT_RESERVED_15] ; encoding: [0x00,0x80,0x7b,0xe8,0x00,0x02,0x9c,0x6d]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7],  dfmt:15, nfmt:0, ttmp1
 
 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], ttmp1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x03,0xe9,0x00,0x02,0x9c,0x6d]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7],  dfmt:0, nfmt:2, ttmp1
 
 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], ttmp1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x02,0x9c,0x6d]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7],  dfmt:15, nfmt:2, ttmp1
 
 // GFX90A: ds_add_u32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x00,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u32 v1, a2 offset:65535
 
 // GFX90A: ds_add_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x00,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u32 v255, a2 offset:65535
 
 // GFX90A: ds_add_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x00,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u32 v1, a255 offset:65535
 
 // GFX90A: ds_add_u32 v1, a2               ; encoding: [0x00,0x00,0x00,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u32 v1, a2
 
 // GFX90A: ds_add_u32 v1, a2               ; encoding: [0x00,0x00,0x00,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u32 v1, a2
 
 // GFX90A: ds_add_u32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x00,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u32 v1, a2 offset:4
 
 // GFX90A: ds_sub_u32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x02,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u32 v1, a2 offset:65535
 
 // GFX90A: ds_sub_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x02,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u32 v255, a2 offset:65535
 
 // GFX90A: ds_sub_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x02,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u32 v1, a255 offset:65535
 
 // GFX90A: ds_sub_u32 v1, a2               ; encoding: [0x00,0x00,0x02,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u32 v1, a2
 
 // GFX90A: ds_sub_u32 v1, a2               ; encoding: [0x00,0x00,0x02,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u32 v1, a2
 
 // GFX90A: ds_sub_u32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x02,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u32 v1, a2 offset:4
 
 // GFX90A: ds_rsub_u32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x04,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u32 v1, a2 offset:65535
 
 // GFX90A: ds_rsub_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x04,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u32 v255, a2 offset:65535
 
 // GFX90A: ds_rsub_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x04,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u32 v1, a255 offset:65535
 
 // GFX90A: ds_rsub_u32 v1, a2              ; encoding: [0x00,0x00,0x04,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u32 v1, a2
 
 // GFX90A: ds_rsub_u32 v1, a2              ; encoding: [0x00,0x00,0x04,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u32 v1, a2
 
 // GFX90A: ds_rsub_u32 v1, a2 offset:4     ; encoding: [0x04,0x00,0x04,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u32 v1, a2 offset:4
 
 // GFX90A: ds_inc_u32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x06,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u32 v1, a2 offset:65535
 
 // GFX90A: ds_inc_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x06,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u32 v255, a2 offset:65535
 
 // GFX90A: ds_inc_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x06,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u32 v1, a255 offset:65535
 
 // GFX90A: ds_inc_u32 v1, a2               ; encoding: [0x00,0x00,0x06,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u32 v1, a2
 
 // GFX90A: ds_inc_u32 v1, a2               ; encoding: [0x00,0x00,0x06,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u32 v1, a2
 
 // GFX90A: ds_inc_u32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x06,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u32 v1, a2 offset:4
 
 // GFX90A: ds_dec_u32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x08,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u32 v1, a2 offset:65535
 
 // GFX90A: ds_dec_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x08,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u32 v255, a2 offset:65535
 
 // GFX90A: ds_dec_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x08,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u32 v1, a255 offset:65535
 
 // GFX90A: ds_dec_u32 v1, a2               ; encoding: [0x00,0x00,0x08,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u32 v1, a2
 
 // GFX90A: ds_dec_u32 v1, a2               ; encoding: [0x00,0x00,0x08,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u32 v1, a2
 
 // GFX90A: ds_dec_u32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x08,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u32 v1, a2 offset:4
 
 // GFX90A: ds_min_i32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x0a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i32 v1, a2 offset:65535
 
 // GFX90A: ds_min_i32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x0a,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i32 v255, a2 offset:65535
 
 // GFX90A: ds_min_i32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x0a,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i32 v1, a255 offset:65535
 
 // GFX90A: ds_min_i32 v1, a2               ; encoding: [0x00,0x00,0x0a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i32 v1, a2
 
 // GFX90A: ds_min_i32 v1, a2               ; encoding: [0x00,0x00,0x0a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i32 v1, a2
 
 // GFX90A: ds_min_i32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x0a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i32 v1, a2 offset:4
 
 // GFX90A: ds_max_i32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x0c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i32 v1, a2 offset:65535
 
 // GFX90A: ds_max_i32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x0c,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i32 v255, a2 offset:65535
 
 // GFX90A: ds_max_i32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x0c,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i32 v1, a255 offset:65535
 
 // GFX90A: ds_max_i32 v1, a2               ; encoding: [0x00,0x00,0x0c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i32 v1, a2
 
 // GFX90A: ds_max_i32 v1, a2               ; encoding: [0x00,0x00,0x0c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i32 v1, a2
 
 // GFX90A: ds_max_i32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x0c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i32 v1, a2 offset:4
 
 // GFX90A: ds_min_u32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x0e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u32 v1, a2 offset:65535
 
 // GFX90A: ds_min_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x0e,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u32 v255, a2 offset:65535
 
 // GFX90A: ds_min_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x0e,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u32 v1, a255 offset:65535
 
 // GFX90A: ds_min_u32 v1, a2               ; encoding: [0x00,0x00,0x0e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u32 v1, a2
 
 // GFX90A: ds_min_u32 v1, a2               ; encoding: [0x00,0x00,0x0e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u32 v1, a2
 
 // GFX90A: ds_min_u32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x0e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u32 v1, a2 offset:4
 
 // GFX90A: ds_max_u32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x10,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u32 v1, a2 offset:65535
 
 // GFX90A: ds_max_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x10,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u32 v255, a2 offset:65535
 
 // GFX90A: ds_max_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x10,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u32 v1, a255 offset:65535
 
 // GFX90A: ds_max_u32 v1, a2               ; encoding: [0x00,0x00,0x10,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u32 v1, a2
 
 // GFX90A: ds_max_u32 v1, a2               ; encoding: [0x00,0x00,0x10,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u32 v1, a2
 
 // GFX90A: ds_max_u32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x10,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u32 v1, a2 offset:4
 
 // GFX90A: ds_and_b32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x12,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b32 v1, a2 offset:65535
 
 // GFX90A: ds_and_b32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x12,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b32 v255, a2 offset:65535
 
 // GFX90A: ds_and_b32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x12,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b32 v1, a255 offset:65535
 
 // GFX90A: ds_and_b32 v1, a2               ; encoding: [0x00,0x00,0x12,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b32 v1, a2
 
 // GFX90A: ds_and_b32 v1, a2               ; encoding: [0x00,0x00,0x12,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b32 v1, a2
 
 // GFX90A: ds_and_b32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x12,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b32 v1, a2 offset:4
 
 // GFX90A: ds_or_b32 v1, a2 offset:65535   ; encoding: [0xff,0xff,0x14,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b32 v1, a2 offset:65535
 
 // GFX90A: ds_or_b32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x14,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b32 v255, a2 offset:65535
 
 // GFX90A: ds_or_b32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x14,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b32 v1, a255 offset:65535
 
 // GFX90A: ds_or_b32 v1, a2                ; encoding: [0x00,0x00,0x14,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b32 v1, a2
 
 // GFX90A: ds_or_b32 v1, a2                ; encoding: [0x00,0x00,0x14,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b32 v1, a2
 
 // GFX90A: ds_or_b32 v1, a2 offset:4       ; encoding: [0x04,0x00,0x14,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b32 v1, a2 offset:4
 
 // GFX90A: ds_xor_b32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x16,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b32 v1, a2 offset:65535
 
 // GFX90A: ds_xor_b32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x16,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b32 v255, a2 offset:65535
 
 // GFX90A: ds_xor_b32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x16,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b32 v1, a255 offset:65535
 
 // GFX90A: ds_xor_b32 v1, a2               ; encoding: [0x00,0x00,0x16,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b32 v1, a2
 
 // GFX90A: ds_xor_b32 v1, a2               ; encoding: [0x00,0x00,0x16,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b32 v1, a2
 
 // GFX90A: ds_xor_b32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x16,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b32 v1, a2 offset:4
 
 // GFX90A: ds_mskor_b32 v1, a2, a3 offset:65535 ; encoding: [0xff,0xff,0x18,0xda,0x01,0x02,0x03,0x00]
@@ -7122,27 +7122,27 @@ ds_mskor_b32 v1, a2, a3
 ds_mskor_b32 v1, a2, a3 offset:4
 
 // GFX90A: ds_write_b32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x1a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b32 v1, a2 offset:65535
 
 // GFX90A: ds_write_b32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x1a,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b32 v255, a2 offset:65535
 
 // GFX90A: ds_write_b32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x1a,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b32 v1, a255 offset:65535
 
 // GFX90A: ds_write_b32 v1, a2             ; encoding: [0x00,0x00,0x1a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b32 v1, a2
 
 // GFX90A: ds_write_b32 v1, a2             ; encoding: [0x00,0x00,0x1a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b32 v1, a2
 
 // GFX90A: ds_write_b32 v1, a2 offset:4    ; encoding: [0x04,0x00,0x1a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b32 v1, a2 offset:4
 
 // GFX90A: ds_write2_b32 v1, a2, a3 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x1c,0xda,0x01,0x02,0x03,0x00]
@@ -7282,123 +7282,123 @@ ds_cmpst_f32 v1, a2, a3
 ds_cmpst_f32 v1, a2, a3 offset:4
 
 // GFX90A: ds_min_f32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x24,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f32 v1, a2 offset:65535
 
 // GFX90A: ds_min_f32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x24,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f32 v255, a2 offset:65535
 
 // GFX90A: ds_min_f32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x24,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f32 v1, a255 offset:65535
 
 // GFX90A: ds_min_f32 v1, a2               ; encoding: [0x00,0x00,0x24,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f32 v1, a2
 
 // GFX90A: ds_min_f32 v1, a2               ; encoding: [0x00,0x00,0x24,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f32 v1, a2
 
 // GFX90A: ds_min_f32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x24,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f32 v1, a2 offset:4
 
 // GFX90A: ds_max_f32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x26,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f32 v1, a2 offset:65535
 
 // GFX90A: ds_max_f32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x26,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f32 v255, a2 offset:65535
 
 // GFX90A: ds_max_f32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x26,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f32 v1, a255 offset:65535
 
 // GFX90A: ds_max_f32 v1, a2               ; encoding: [0x00,0x00,0x26,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f32 v1, a2
 
 // GFX90A: ds_max_f32 v1, a2               ; encoding: [0x00,0x00,0x26,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f32 v1, a2
 
 // GFX90A: ds_max_f32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x26,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f32 v1, a2 offset:4
 
 // GFX90A: ds_add_f32 v1, a2 offset:65535  ; encoding: [0xff,0xff,0x2a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_f32 v1, a2 offset:65535
 
 // GFX90A: ds_add_f32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x2a,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_f32 v255, a2 offset:65535
 
 // GFX90A: ds_add_f32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x2a,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_f32 v1, a255 offset:65535
 
 // GFX90A: ds_add_f32 v1, a2               ; encoding: [0x00,0x00,0x2a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_f32 v1, a2
 
 // GFX90A: ds_add_f32 v1, a2               ; encoding: [0x00,0x00,0x2a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_f32 v1, a2
 
 // GFX90A: ds_add_f32 v1, a2 offset:4      ; encoding: [0x04,0x00,0x2a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_f32 v1, a2 offset:4
 
 // GFX90A: ds_write_b8 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x3c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8 v1, a2 offset:65535
 
 // GFX90A: ds_write_b8 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x3c,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8 v255, a2 offset:65535
 
 // GFX90A: ds_write_b8 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x3c,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8 v1, a255 offset:65535
 
 // GFX90A: ds_write_b8 v1, a2              ; encoding: [0x00,0x00,0x3c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8 v1, a2
 
 // GFX90A: ds_write_b8 v1, a2              ; encoding: [0x00,0x00,0x3c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8 v1, a2
 
 // GFX90A: ds_write_b8 v1, a2 offset:4     ; encoding: [0x04,0x00,0x3c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8 v1, a2 offset:4
 
 // GFX90A: ds_write_b16 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x3e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16 v1, a2 offset:65535
 
 // GFX90A: ds_write_b16 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x3e,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16 v255, a2 offset:65535
 
 // GFX90A: ds_write_b16 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x3e,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16 v1, a255 offset:65535
 
 // GFX90A: ds_write_b16 v1, a2             ; encoding: [0x00,0x00,0x3e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16 v1, a2
 
 // GFX90A: ds_write_b16 v1, a2             ; encoding: [0x00,0x00,0x3e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16 v1, a2
 
 // GFX90A: ds_write_b16 v1, a2 offset:4    ; encoding: [0x04,0x00,0x3e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16 v1, a2 offset:4
 
 // GFX90A: ds_add_rtn_u32 a5, v1, a2 offset:65535 ; encoding: [0xff,0xff,0x40,0xda,0x01,0x02,0x00,0x05]
@@ -8066,219 +8066,219 @@ ds_add_rtn_f32 a5, v1, a2
 ds_add_rtn_f32 a5, v1, a2 offset:4
 
 // GFX90A: ds_read_b32 a5, v1 offset:65535 ; encoding: [0xff,0xff,0x6c,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b32 a5, v1 offset:65535
 
 // GFX90A: ds_read_b32 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x6c,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b32 a255, v1 offset:65535
 
 // GFX90A: ds_read_b32 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x6c,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b32 a5, v255 offset:65535
 
 // GFX90A: ds_read_b32 a5, v1              ; encoding: [0x00,0x00,0x6c,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b32 a5, v1
 
 // GFX90A: ds_read_b32 a5, v1              ; encoding: [0x00,0x00,0x6c,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b32 a5, v1
 
 // GFX90A: ds_read_b32 a5, v1 offset:4     ; encoding: [0x04,0x00,0x6c,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b32 a5, v1 offset:4
 
 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x6e,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b32 a[6:7], v1 offset0:127 offset1:255
 
 // GFX90A: ds_read2_b32 a[254:255], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x6e,0xda,0x01,0x00,0x00,0xfe]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b32 a[254:255], v1 offset0:127 offset1:255
 
 // GFX90A: ds_read2_b32 a[6:7], v255 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x6e,0xda,0xff,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b32 a[6:7], v255 offset0:127 offset1:255
 
 // GFX90A: ds_read2_b32 a[6:7], v1 offset1:255 ; encoding: [0x00,0xff,0x6e,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b32 a[6:7], v1 offset1:255
 
 // GFX90A: ds_read2_b32 a[6:7], v1 offset1:255 ; encoding: [0x00,0xff,0x6e,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b32 a[6:7], v1 offset1:255
 
 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:16 offset1:255 ; encoding: [0x10,0xff,0x6e,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b32 a[6:7], v1 offset0:16 offset1:255
 
 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:127 ; encoding: [0x7f,0x00,0x6e,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b32 a[6:7], v1 offset0:127
 
 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:127 ; encoding: [0x7f,0x00,0x6e,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b32 a[6:7], v1 offset0:127
 
 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:127 offset1:1 ; encoding: [0x7f,0x01,0x6e,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b32 a[6:7], v1 offset0:127 offset1:1
 
 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x70,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b32 a[6:7], v1 offset0:127 offset1:255
 
 // GFX90A: ds_read2st64_b32 a[254:255], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x70,0xda,0x01,0x00,0x00,0xfe]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b32 a[254:255], v1 offset0:127 offset1:255
 
 // GFX90A: ds_read2st64_b32 a[6:7], v255 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x70,0xda,0xff,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b32 a[6:7], v255 offset0:127 offset1:255
 
 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset1:255 ; encoding: [0x00,0xff,0x70,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b32 a[6:7], v1 offset1:255
 
 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset1:255 ; encoding: [0x00,0xff,0x70,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b32 a[6:7], v1 offset1:255
 
 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:16 offset1:255 ; encoding: [0x10,0xff,0x70,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b32 a[6:7], v1 offset0:16 offset1:255
 
 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:127 ; encoding: [0x7f,0x00,0x70,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b32 a[6:7], v1 offset0:127
 
 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:127 ; encoding: [0x7f,0x00,0x70,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b32 a[6:7], v1 offset0:127
 
 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:127 offset1:1 ; encoding: [0x7f,0x01,0x70,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b32 a[6:7], v1 offset0:127 offset1:1
 
 // GFX90A: ds_read_i8 a5, v1 offset:65535  ; encoding: [0xff,0xff,0x72,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8 a5, v1 offset:65535
 
 // GFX90A: ds_read_i8 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x72,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8 a255, v1 offset:65535
 
 // GFX90A: ds_read_i8 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x72,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8 a5, v255 offset:65535
 
 // GFX90A: ds_read_i8 a5, v1               ; encoding: [0x00,0x00,0x72,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8 a5, v1
 
 // GFX90A: ds_read_i8 a5, v1               ; encoding: [0x00,0x00,0x72,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8 a5, v1
 
 // GFX90A: ds_read_i8 a5, v1 offset:4      ; encoding: [0x04,0x00,0x72,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8 a5, v1 offset:4
 
 // GFX90A: ds_read_u8 a5, v1 offset:65535  ; encoding: [0xff,0xff,0x74,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8 a5, v1 offset:65535
 
 // GFX90A: ds_read_u8 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x74,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8 a255, v1 offset:65535
 
 // GFX90A: ds_read_u8 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x74,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8 a5, v255 offset:65535
 
 // GFX90A: ds_read_u8 a5, v1               ; encoding: [0x00,0x00,0x74,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8 a5, v1
 
 // GFX90A: ds_read_u8 a5, v1               ; encoding: [0x00,0x00,0x74,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8 a5, v1
 
 // GFX90A: ds_read_u8 a5, v1 offset:4      ; encoding: [0x04,0x00,0x74,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8 a5, v1 offset:4
 
 // GFX90A: ds_read_i16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0x76,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i16 a5, v1 offset:65535
 
 // GFX90A: ds_read_i16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x76,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i16 a255, v1 offset:65535
 
 // GFX90A: ds_read_i16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x76,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i16 a5, v255 offset:65535
 
 // GFX90A: ds_read_i16 a5, v1              ; encoding: [0x00,0x00,0x76,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i16 a5, v1
 
 // GFX90A: ds_read_i16 a5, v1              ; encoding: [0x00,0x00,0x76,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i16 a5, v1
 
 // GFX90A: ds_read_i16 a5, v1 offset:4     ; encoding: [0x04,0x00,0x76,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i16 a5, v1 offset:4
 
 // GFX90A: ds_read_u16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0x78,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16 a5, v1 offset:65535
 
 // GFX90A: ds_read_u16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x78,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16 a255, v1 offset:65535
 
 // GFX90A: ds_read_u16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x78,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16 a5, v255 offset:65535
 
 // GFX90A: ds_read_u16 a5, v1              ; encoding: [0x00,0x00,0x78,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16 a5, v1
 
 // GFX90A: ds_read_u16 a5, v1              ; encoding: [0x00,0x00,0x78,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16 a5, v1
 
 // GFX90A: ds_read_u16 a5, v1 offset:4     ; encoding: [0x04,0x00,0x78,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16 a5, v1 offset:4
 
 // GFX90A: ds_swizzle_b32 a5, v1 offset:swizzle(FFT,31) ; encoding: [0xff,0xff,0x7a,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_swizzle_b32 a5, v1 offset:65535
 
 // GFX90A: ds_swizzle_b32 a255, v1 offset:swizzle(FFT,31) ; encoding: [0xff,0xff,0x7a,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_swizzle_b32 a255, v1 offset:65535
 
 // GFX90A: ds_swizzle_b32 a5, v255 offset:swizzle(FFT,31) ; encoding: [0xff,0xff,0x7a,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_swizzle_b32 a5, v255 offset:65535
 
 // GFX90A: ds_swizzle_b32 a5, v1           ; encoding: [0x00,0x00,0x7a,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_swizzle_b32 a5, v1
 
 // GFX90A: ds_swizzle_b32 a5, v1           ; encoding: [0x00,0x00,0x7a,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_swizzle_b32 a5, v1
 
 // GFX90A: ds_swizzle_b32 a5, v1 offset:swizzle(BITMASK_PERM,"00p00") ; encoding: [0x04,0x00,0x7a,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_swizzle_b32 a5, v1 offset:swizzle(BITMASK_PERM,"00p00")
 
 // GFX90A: ds_permute_b32 a5, v1, a2 offset:65535 ; encoding: [0xff,0xff,0x7c,0xda,0x01,0x02,0x00,0x05]
@@ -8338,291 +8338,291 @@ ds_bpermute_b32 a5, v1, a2
 ds_bpermute_b32 a5, v1, a2 offset:4
 
 // GFX90A: ds_add_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x80,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_add_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x80,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_add_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x80,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_add_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x80,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u64 v1, a[2:3]
 
 // GFX90A: ds_add_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x80,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u64 v1, a[2:3]
 
 // GFX90A: ds_add_u64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x80,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_add_u64 v1, a[2:3] offset:4
 
 // GFX90A: ds_sub_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x82,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_sub_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x82,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_sub_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x82,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_sub_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x82,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u64 v1, a[2:3]
 
 // GFX90A: ds_sub_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x82,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u64 v1, a[2:3]
 
 // GFX90A: ds_sub_u64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x82,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_sub_u64 v1, a[2:3] offset:4
 
 // GFX90A: ds_rsub_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x84,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_rsub_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x84,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_rsub_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x84,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_rsub_u64 v1, a[2:3]          ; encoding: [0x00,0x00,0x84,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u64 v1, a[2:3]
 
 // GFX90A: ds_rsub_u64 v1, a[2:3]          ; encoding: [0x00,0x00,0x84,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u64 v1, a[2:3]
 
 // GFX90A: ds_rsub_u64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x84,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_rsub_u64 v1, a[2:3] offset:4
 
 // GFX90A: ds_inc_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x86,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_inc_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x86,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_inc_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x86,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_inc_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x86,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u64 v1, a[2:3]
 
 // GFX90A: ds_inc_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x86,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u64 v1, a[2:3]
 
 // GFX90A: ds_inc_u64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x86,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_inc_u64 v1, a[2:3] offset:4
 
 // GFX90A: ds_dec_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x88,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_dec_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x88,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_dec_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x88,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_dec_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x88,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u64 v1, a[2:3]
 
 // GFX90A: ds_dec_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x88,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u64 v1, a[2:3]
 
 // GFX90A: ds_dec_u64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x88,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_dec_u64 v1, a[2:3] offset:4
 
 // GFX90A: ds_min_i64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_min_i64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8a,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_min_i64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x8a,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_min_i64 v1, a[2:3]           ; encoding: [0x00,0x00,0x8a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i64 v1, a[2:3]
 
 // GFX90A: ds_min_i64 v1, a[2:3]           ; encoding: [0x00,0x00,0x8a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i64 v1, a[2:3]
 
 // GFX90A: ds_min_i64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x8a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_i64 v1, a[2:3] offset:4
 
 // GFX90A: ds_max_i64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_max_i64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8c,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_max_i64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x8c,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_max_i64 v1, a[2:3]           ; encoding: [0x00,0x00,0x8c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i64 v1, a[2:3]
 
 // GFX90A: ds_max_i64 v1, a[2:3]           ; encoding: [0x00,0x00,0x8c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i64 v1, a[2:3]
 
 // GFX90A: ds_max_i64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x8c,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_i64 v1, a[2:3] offset:4
 
 // GFX90A: ds_min_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_min_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8e,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_min_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x8e,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_min_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x8e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u64 v1, a[2:3]
 
 // GFX90A: ds_min_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x8e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u64 v1, a[2:3]
 
 // GFX90A: ds_min_u64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x8e,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_u64 v1, a[2:3] offset:4
 
 // GFX90A: ds_max_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x90,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_max_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x90,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_max_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x90,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_max_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x90,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u64 v1, a[2:3]
 
 // GFX90A: ds_max_u64 v1, a[2:3]           ; encoding: [0x00,0x00,0x90,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u64 v1, a[2:3]
 
 // GFX90A: ds_max_u64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x90,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_u64 v1, a[2:3] offset:4
 
 // GFX90A: ds_and_b64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x92,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_and_b64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x92,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_and_b64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x92,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_and_b64 v1, a[2:3]           ; encoding: [0x00,0x00,0x92,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b64 v1, a[2:3]
 
 // GFX90A: ds_and_b64 v1, a[2:3]           ; encoding: [0x00,0x00,0x92,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b64 v1, a[2:3]
 
 // GFX90A: ds_and_b64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x92,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_and_b64 v1, a[2:3] offset:4
 
 // GFX90A: ds_or_b64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x94,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_or_b64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x94,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_or_b64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x94,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_or_b64 v1, a[2:3]            ; encoding: [0x00,0x00,0x94,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b64 v1, a[2:3]
 
 // GFX90A: ds_or_b64 v1, a[2:3]            ; encoding: [0x00,0x00,0x94,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b64 v1, a[2:3]
 
 // GFX90A: ds_or_b64 v1, a[2:3] offset:4   ; encoding: [0x04,0x00,0x94,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_or_b64 v1, a[2:3] offset:4
 
 // GFX90A: ds_xor_b64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x96,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_xor_b64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x96,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_xor_b64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x96,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_xor_b64 v1, a[2:3]           ; encoding: [0x00,0x00,0x96,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b64 v1, a[2:3]
 
 // GFX90A: ds_xor_b64 v1, a[2:3]           ; encoding: [0x00,0x00,0x96,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b64 v1, a[2:3]
 
 // GFX90A: ds_xor_b64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0x96,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_xor_b64 v1, a[2:3] offset:4
 
 // GFX90A: ds_mskor_b64 v1, a[2:3], a[4:5] offset:65535 ; encoding: [0xff,0xff,0x98,0xda,0x01,0x02,0x04,0x00]
@@ -8654,27 +8654,27 @@ ds_mskor_b64 v1, a[2:3], a[4:5]
 ds_mskor_b64 v1, a[2:3], a[4:5] offset:4
 
 // GFX90A: ds_write_b64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x9a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_write_b64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x9a,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_write_b64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x9a,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_write_b64 v1, a[2:3]         ; encoding: [0x00,0x00,0x9a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b64 v1, a[2:3]
 
 // GFX90A: ds_write_b64 v1, a[2:3]         ; encoding: [0x00,0x00,0x9a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b64 v1, a[2:3]
 
 // GFX90A: ds_write_b64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x9a,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b64 v1, a[2:3] offset:4
 
 // GFX90A: ds_write2_b64 v1, a[2:3], a[4:5] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x9c,0xda,0x01,0x02,0x04,0x00]
@@ -8814,243 +8814,243 @@ ds_cmpst_f64 v1, a[2:3], a[4:5]
 ds_cmpst_f64 v1, a[2:3], a[4:5] offset:4
 
 // GFX90A: ds_min_f64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xa4,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_min_f64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xa4,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_min_f64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0xa4,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_min_f64 v1, a[2:3]           ; encoding: [0x00,0x00,0xa4,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f64 v1, a[2:3]
 
 // GFX90A: ds_min_f64 v1, a[2:3]           ; encoding: [0x00,0x00,0xa4,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f64 v1, a[2:3]
 
 // GFX90A: ds_min_f64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0xa4,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_min_f64 v1, a[2:3] offset:4
 
 // GFX90A: ds_max_f64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xa6,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f64 v1, a[2:3] offset:65535
 
 // GFX90A: ds_max_f64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xa6,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f64 v255, a[2:3] offset:65535
 
 // GFX90A: ds_max_f64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0xa6,0xda,0x01,0xfe,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f64 v1, a[254:255] offset:65535
 
 // GFX90A: ds_max_f64 v1, a[2:3]           ; encoding: [0x00,0x00,0xa6,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f64 v1, a[2:3]
 
 // GFX90A: ds_max_f64 v1, a[2:3]           ; encoding: [0x00,0x00,0xa6,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f64 v1, a[2:3]
 
 // GFX90A: ds_max_f64 v1, a[2:3] offset:4  ; encoding: [0x04,0x00,0xa6,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_max_f64 v1, a[2:3] offset:4
 
 // GFX90A: ds_write_b8_d16_hi v1, a2 offset:65535 ; encoding: [0xff,0xff,0xa8,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8_d16_hi v1, a2 offset:65535
 
 // GFX90A: ds_write_b8_d16_hi v255, a2 offset:65535 ; encoding: [0xff,0xff,0xa8,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8_d16_hi v255, a2 offset:65535
 
 // GFX90A: ds_write_b8_d16_hi v1, a255 offset:65535 ; encoding: [0xff,0xff,0xa8,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8_d16_hi v1, a255 offset:65535
 
 // GFX90A: ds_write_b8_d16_hi v1, a2       ; encoding: [0x00,0x00,0xa8,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8_d16_hi v1, a2
 
 // GFX90A: ds_write_b8_d16_hi v1, a2       ; encoding: [0x00,0x00,0xa8,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8_d16_hi v1, a2
 
 // GFX90A: ds_write_b8_d16_hi v1, a2 offset:4 ; encoding: [0x04,0x00,0xa8,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b8_d16_hi v1, a2 offset:4
 
 // GFX90A: ds_write_b16_d16_hi v1, a2 offset:65535 ; encoding: [0xff,0xff,0xaa,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16_d16_hi v1, a2 offset:65535
 
 // GFX90A: ds_write_b16_d16_hi v255, a2 offset:65535 ; encoding: [0xff,0xff,0xaa,0xda,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16_d16_hi v255, a2 offset:65535
 
 // GFX90A: ds_write_b16_d16_hi v1, a255 offset:65535 ; encoding: [0xff,0xff,0xaa,0xda,0x01,0xff,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16_d16_hi v1, a255 offset:65535
 
 // GFX90A: ds_write_b16_d16_hi v1, a2      ; encoding: [0x00,0x00,0xaa,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16_d16_hi v1, a2
 
 // GFX90A: ds_write_b16_d16_hi v1, a2      ; encoding: [0x00,0x00,0xaa,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16_d16_hi v1, a2
 
 // GFX90A: ds_write_b16_d16_hi v1, a2 offset:4 ; encoding: [0x04,0x00,0xaa,0xda,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b16_d16_hi v1, a2 offset:4
 
 // GFX90A: ds_read_u8_d16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0xac,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16 a5, v1 offset:65535
 
 // GFX90A: ds_read_u8_d16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0xac,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16 a255, v1 offset:65535
 
 // GFX90A: ds_read_u8_d16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0xac,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16 a5, v255 offset:65535
 
 // GFX90A: ds_read_u8_d16 a5, v1           ; encoding: [0x00,0x00,0xac,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16 a5, v1
 
 // GFX90A: ds_read_u8_d16 a5, v1           ; encoding: [0x00,0x00,0xac,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16 a5, v1
 
 // GFX90A: ds_read_u8_d16 a5, v1 offset:4  ; encoding: [0x04,0x00,0xac,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16 a5, v1 offset:4
 
 // GFX90A: ds_read_u8_d16_hi a5, v1 offset:65535 ; encoding: [0xff,0xff,0xae,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16_hi a5, v1 offset:65535
 
 // GFX90A: ds_read_u8_d16_hi a255, v1 offset:65535 ; encoding: [0xff,0xff,0xae,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16_hi a255, v1 offset:65535
 
 // GFX90A: ds_read_u8_d16_hi a5, v255 offset:65535 ; encoding: [0xff,0xff,0xae,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16_hi a5, v255 offset:65535
 
 // GFX90A: ds_read_u8_d16_hi a5, v1        ; encoding: [0x00,0x00,0xae,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16_hi a5, v1
 
 // GFX90A: ds_read_u8_d16_hi a5, v1        ; encoding: [0x00,0x00,0xae,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16_hi a5, v1
 
 // GFX90A: ds_read_u8_d16_hi a5, v1 offset:4 ; encoding: [0x04,0x00,0xae,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u8_d16_hi a5, v1 offset:4
 
 // GFX90A: ds_read_i8_d16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0xb0,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16 a5, v1 offset:65535
 
 // GFX90A: ds_read_i8_d16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0xb0,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16 a255, v1 offset:65535
 
 // GFX90A: ds_read_i8_d16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0xb0,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16 a5, v255 offset:65535
 
 // GFX90A: ds_read_i8_d16 a5, v1           ; encoding: [0x00,0x00,0xb0,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16 a5, v1
 
 // GFX90A: ds_read_i8_d16 a5, v1           ; encoding: [0x00,0x00,0xb0,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16 a5, v1
 
 // GFX90A: ds_read_i8_d16 a5, v1 offset:4  ; encoding: [0x04,0x00,0xb0,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16 a5, v1 offset:4
 
 // GFX90A: ds_read_i8_d16_hi a5, v1 offset:65535 ; encoding: [0xff,0xff,0xb2,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16_hi a5, v1 offset:65535
 
 // GFX90A: ds_read_i8_d16_hi a255, v1 offset:65535 ; encoding: [0xff,0xff,0xb2,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16_hi a255, v1 offset:65535
 
 // GFX90A: ds_read_i8_d16_hi a5, v255 offset:65535 ; encoding: [0xff,0xff,0xb2,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16_hi a5, v255 offset:65535
 
 // GFX90A: ds_read_i8_d16_hi a5, v1        ; encoding: [0x00,0x00,0xb2,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16_hi a5, v1
 
 // GFX90A: ds_read_i8_d16_hi a5, v1        ; encoding: [0x00,0x00,0xb2,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16_hi a5, v1
 
 // GFX90A: ds_read_i8_d16_hi a5, v1 offset:4 ; encoding: [0x04,0x00,0xb2,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_i8_d16_hi a5, v1 offset:4
 
 // GFX90A: ds_read_u16_d16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0xb4,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16 a5, v1 offset:65535
 
 // GFX90A: ds_read_u16_d16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0xb4,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16 a255, v1 offset:65535
 
 // GFX90A: ds_read_u16_d16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0xb4,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16 a5, v255 offset:65535
 
 // GFX90A: ds_read_u16_d16 a5, v1          ; encoding: [0x00,0x00,0xb4,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16 a5, v1
 
 // GFX90A: ds_read_u16_d16 a5, v1          ; encoding: [0x00,0x00,0xb4,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16 a5, v1
 
 // GFX90A: ds_read_u16_d16 a5, v1 offset:4 ; encoding: [0x04,0x00,0xb4,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16 a5, v1 offset:4
 
 // GFX90A: ds_read_u16_d16_hi a5, v1 offset:65535 ; encoding: [0xff,0xff,0xb6,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16_hi a5, v1 offset:65535
 
 // GFX90A: ds_read_u16_d16_hi a255, v1 offset:65535 ; encoding: [0xff,0xff,0xb6,0xda,0x01,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16_hi a255, v1 offset:65535
 
 // GFX90A: ds_read_u16_d16_hi a5, v255 offset:65535 ; encoding: [0xff,0xff,0xb6,0xda,0xff,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16_hi a5, v255 offset:65535
 
 // GFX90A: ds_read_u16_d16_hi a5, v1       ; encoding: [0x00,0x00,0xb6,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16_hi a5, v1
 
 // GFX90A: ds_read_u16_d16_hi a5, v1       ; encoding: [0x00,0x00,0xb6,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16_hi a5, v1
 
 // GFX90A: ds_read_u16_d16_hi a5, v1 offset:4 ; encoding: [0x04,0x00,0xb6,0xda,0x01,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_u16_d16_hi a5, v1 offset:4
 
 // GFX90A: ds_add_rtn_u64 a[6:7], v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xc0,0xda,0x01,0x02,0x00,0x06]
@@ -9658,99 +9658,99 @@ ds_max_rtn_f64 a[6:7], v1, a[2:3]
 ds_max_rtn_f64 a[6:7], v1, a[2:3] offset:4
 
 // GFX90A: ds_read_b64 a[6:7], v1 offset:65535 ; encoding: [0xff,0xff,0xec,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b64 a[6:7], v1 offset:65535
 
 // GFX90A: ds_read_b64 a[254:255], v1 offset:65535 ; encoding: [0xff,0xff,0xec,0xda,0x01,0x00,0x00,0xfe]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b64 a[254:255], v1 offset:65535
 
 // GFX90A: ds_read_b64 a[6:7], v255 offset:65535 ; encoding: [0xff,0xff,0xec,0xda,0xff,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b64 a[6:7], v255 offset:65535
 
 // GFX90A: ds_read_b64 a[6:7], v1          ; encoding: [0x00,0x00,0xec,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b64 a[6:7], v1
 
 // GFX90A: ds_read_b64 a[6:7], v1          ; encoding: [0x00,0x00,0xec,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b64 a[6:7], v1
 
 // GFX90A: ds_read_b64 a[6:7], v1 offset:4 ; encoding: [0x04,0x00,0xec,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b64 a[6:7], v1 offset:4
 
 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xee,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b64 a[6:9], v1 offset0:127 offset1:255
 
 // GFX90A: ds_read2_b64 a[252:255], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xee,0xda,0x01,0x00,0x00,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b64 a[252:255], v1 offset0:127 offset1:255
 
 // GFX90A: ds_read2_b64 a[6:9], v255 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xee,0xda,0xff,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b64 a[6:9], v255 offset0:127 offset1:255
 
 // GFX90A: ds_read2_b64 a[6:9], v1 offset1:255 ; encoding: [0x00,0xff,0xee,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b64 a[6:9], v1 offset1:255
 
 // GFX90A: ds_read2_b64 a[6:9], v1 offset1:255 ; encoding: [0x00,0xff,0xee,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b64 a[6:9], v1 offset1:255
 
 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:16 offset1:255 ; encoding: [0x10,0xff,0xee,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b64 a[6:9], v1 offset0:16 offset1:255
 
 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:127 ; encoding: [0x7f,0x00,0xee,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b64 a[6:9], v1 offset0:127
 
 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:127 ; encoding: [0x7f,0x00,0xee,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b64 a[6:9], v1 offset0:127
 
 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:127 offset1:1 ; encoding: [0x7f,0x01,0xee,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2_b64 a[6:9], v1 offset0:127 offset1:1
 
 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xf0,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b64 a[6:9], v1 offset0:127 offset1:255
 
 // GFX90A: ds_read2st64_b64 a[252:255], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xf0,0xda,0x01,0x00,0x00,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b64 a[252:255], v1 offset0:127 offset1:255
 
 // GFX90A: ds_read2st64_b64 a[6:9], v255 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xf0,0xda,0xff,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b64 a[6:9], v255 offset0:127 offset1:255
 
 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset1:255 ; encoding: [0x00,0xff,0xf0,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b64 a[6:9], v1 offset1:255
 
 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset1:255 ; encoding: [0x00,0xff,0xf0,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b64 a[6:9], v1 offset1:255
 
 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:16 offset1:255 ; encoding: [0x10,0xff,0xf0,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b64 a[6:9], v1 offset0:16 offset1:255
 
 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:127 ; encoding: [0x7f,0x00,0xf0,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b64 a[6:9], v1 offset0:127
 
 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:127 ; encoding: [0x7f,0x00,0xf0,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b64 a[6:9], v1 offset0:127
 
 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:127 offset1:1 ; encoding: [0x7f,0x01,0xf0,0xda,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read2st64_b64 a[6:9], v1 offset0:127 offset1:1
 
 // GFX90A: ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xfc,0xda,0x01,0x02,0x00,0x06]
@@ -9782,921 +9782,921 @@ ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3]
 ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:4
 
 // GFX90A: ds_gws_init a0 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x00,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_init a0 offset:65535 gds
 
 // GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_init a254 offset:65535 gds
 
 // GFX90A: ds_gws_init a2 gds ; encoding: [0x00,0x00,0x33,0xdb,0x02,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_init a2 gds
 
 // GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_init a0 gds
 
 // GFX90A: ds_gws_init a0 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_init a0 offset:4 gds
 
 // GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_sema_br a2 offset:65535 gds
 
 // GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_sema_br a254 offset:65535 gds
 
 // GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_sema_br a0 gds
 
 // GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_sema_br a2 gds
 
 // GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_sema_br a0 offset:4 gds
 
 // GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_barrier a2 offset:65535 gds
 
 // GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_barrier a254 offset:65535 gds
 
 // GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_barrier a0 gds
 
 // GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_barrier a2 gds
 
 // GFX90A: ds_gws_barrier a0 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_gws_barrier a0 offset:4 gds
 
 // GFX90A: ds_consume a5 offset:65535      ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_consume a5 offset:65535
 
 // GFX90A: ds_consume a255 offset:65535    ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_consume a255 offset:65535
 
 // GFX90A: ds_consume a5                   ; encoding: [0x00,0x00,0x7a,0xdb,0x00,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_consume a5
 
 // GFX90A: ds_consume a5                   ; encoding: [0x00,0x00,0x7a,0xdb,0x00,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_consume a5
 
 // GFX90A: ds_consume a5 offset:4          ; encoding: [0x04,0x00,0x7a,0xdb,0x00,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_consume a5 offset:4
 
 // GFX90A: ds_append a5 offset:65535       ; encoding: [0xff,0xff,0x7c,0xdb,0x00,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_append a5 offset:65535
 
 // GFX90A: ds_append a255 offset:65535     ; encoding: [0xff,0xff,0x7c,0xdb,0x00,0x00,0x00,0xff]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_append a255 offset:65535
 
 // GFX90A: ds_append a5                    ; encoding: [0x00,0x00,0x7c,0xdb,0x00,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_append a5
 
 // GFX90A: ds_append a5                    ; encoding: [0x00,0x00,0x7c,0xdb,0x00,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_append a5
 
 // GFX90A: ds_append a5 offset:4           ; encoding: [0x04,0x00,0x7c,0xdb,0x00,0x00,0x00,0x05]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_append a5 offset:4
 
 // GFX90A: ds_write_b96 v1, a[2:4] offset:65535 ; encoding: [0xff,0xff,0xbc,0xdb,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b96 v1, a[2:4] offset:65535
 
 // GFX90A: ds_write_b96 v255, a[2:4] offset:65535 ; encoding: [0xff,0xff,0xbc,0xdb,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b96 v255, a[2:4] offset:65535
 
 // GFX90A: ds_write_b96 v1, a[252:254] offset:65535 ; encoding: [0xff,0xff,0xbc,0xdb,0x01,0xfc,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b96 v1, a[252:254] offset:65535
 
 // GFX90A: ds_write_b96 v1, a[2:4]         ; encoding: [0x00,0x00,0xbc,0xdb,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b96 v1, a[2:4]
 
 // GFX90A: ds_write_b96 v1, a[2:4]         ; encoding: [0x00,0x00,0xbc,0xdb,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b96 v1, a[2:4]
 
 // GFX90A: ds_write_b96 v1, a[2:4] offset:4 ; encoding: [0x04,0x00,0xbc,0xdb,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b96 v1, a[2:4] offset:4
 
 // GFX90A: ds_write_b128 v1, a[2:5] offset:65535 ; encoding: [0xff,0xff,0xbe,0xdb,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b128 v1, a[2:5] offset:65535
 
 // GFX90A: ds_write_b128 v255, a[2:5] offset:65535 ; encoding: [0xff,0xff,0xbe,0xdb,0xff,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b128 v255, a[2:5] offset:65535
 
 // GFX90A: ds_write_b128 v1, a[252:255] offset:65535 ; encoding: [0xff,0xff,0xbe,0xdb,0x01,0xfc,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b128 v1, a[252:255] offset:65535
 
 // GFX90A: ds_write_b128 v1, a[2:5]        ; encoding: [0x00,0x00,0xbe,0xdb,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b128 v1, a[2:5]
 
 // GFX90A: ds_write_b128 v1, a[2:5]        ; encoding: [0x00,0x00,0xbe,0xdb,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b128 v1, a[2:5]
 
 // GFX90A: ds_write_b128 v1, a[2:5] offset:4 ; encoding: [0x04,0x00,0xbe,0xdb,0x01,0x02,0x00,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_write_b128 v1, a[2:5] offset:4
 
 // GFX90A: ds_read_b96 a[6:8], v1 offset:65535 ; encoding: [0xff,0xff,0xfc,0xdb,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b96 a[6:8], v1 offset:65535
 
 // GFX90A: ds_read_b96 a[252:254], v1 offset:65535 ; encoding: [0xff,0xff,0xfc,0xdb,0x01,0x00,0x00,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b96 a[252:254], v1 offset:65535
 
 // GFX90A: ds_read_b96 a[6:8], v255 offset:65535 ; encoding: [0xff,0xff,0xfc,0xdb,0xff,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b96 a[6:8], v255 offset:65535
 
 // GFX90A: ds_read_b96 a[6:8], v1          ; encoding: [0x00,0x00,0xfc,0xdb,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b96 a[6:8], v1
 
 // GFX90A: ds_read_b96 a[6:8], v1          ; encoding: [0x00,0x00,0xfc,0xdb,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b96 a[6:8], v1
 
 // GFX90A: ds_read_b96 a[6:8], v1 offset:4 ; encoding: [0x04,0x00,0xfc,0xdb,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b96 a[6:8], v1 offset:4
 
 // GFX90A: ds_read_b128 a[6:9], v1 offset:65535 ; encoding: [0xff,0xff,0xfe,0xdb,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b128 a[6:9], v1 offset:65535
 
 // GFX90A: ds_read_b128 a[252:255], v1 offset:65535 ; encoding: [0xff,0xff,0xfe,0xdb,0x01,0x00,0x00,0xfc]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b128 a[252:255], v1 offset:65535
 
 // GFX90A: ds_read_b128 a[6:9], v255 offset:65535 ; encoding: [0xff,0xff,0xfe,0xdb,0xff,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b128 a[6:9], v255 offset:65535
 
 // GFX90A: ds_read_b128 a[6:9], v1         ; encoding: [0x00,0x00,0xfe,0xdb,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b128 a[6:9], v1
 
 // GFX90A: ds_read_b128 a[6:9], v1         ; encoding: [0x00,0x00,0xfe,0xdb,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b128 a[6:9], v1
 
 // GFX90A: ds_read_b128 a[6:9], v1 offset:4 ; encoding: [0x04,0x00,0xfe,0xdb,0x01,0x00,0x00,0x06]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 ds_read_b128 a[6:9], v1 offset:4
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x1
 
 // GFX90A: image_load a252, v[2:5], s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a252, v[2:5], s[8:15] dmask:0x1
 
 // GFX90A: image_load a5, v[252:255], s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[252:255], s[8:15] dmask:0x1
 
 // GFX90A: image_load a5, v[2:5], s[12:19] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[12:19] dmask:0x1
 
 // GFX90A: image_load a5, v[2:5], s[92:99] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[92:99] dmask:0x1
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x2 ; encoding: [0x00,0x02,0x01,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x2
 
 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:7], v[2:5], s[8:15] dmask:0x3
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x4 ; encoding: [0x00,0x04,0x01,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x4
 
 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:7], v[2:5], s[8:15] dmask:0x5
 
 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0x6 ; encoding: [0x00,0x06,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:7], v[2:5], s[8:15] dmask:0x6
 
 // GFX90A: image_load a[6:8], v[2:5], s[8:15] dmask:0x7 ; encoding: [0x00,0x07,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:8], v[2:5], s[8:15] dmask:0x7
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x8 ; encoding: [0x00,0x08,0x01,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x8
 
 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0x9 ; encoding: [0x00,0x09,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:7], v[2:5], s[8:15] dmask:0x9
 
 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0xa ; encoding: [0x00,0x0a,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:7], v[2:5], s[8:15] dmask:0xa
 
 // GFX90A: image_load a[6:8], v[2:5], s[8:15] dmask:0xb ; encoding: [0x00,0x0b,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:8], v[2:5], s[8:15] dmask:0xb
 
 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0xc ; encoding: [0x00,0x0c,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:7], v[2:5], s[8:15] dmask:0xc
 
 // GFX90A: image_load a[6:8], v[2:5], s[8:15] dmask:0xd ; encoding: [0x00,0x0d,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:8], v[2:5], s[8:15] dmask:0xd
 
 // GFX90A: image_load a[6:8], v[2:5], s[8:15] dmask:0xe ; encoding: [0x00,0x0e,0x01,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a[6:8], v[2:5], s[8:15] dmask:0xe
 
 // GFX90A: image_load a5, v[2:5], s[8:15]  ; encoding: [0x00,0x00,0x01,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15]
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x01,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 glc ; encoding: [0x00,0x21,0x01,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x1 glc
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 slc ; encoding: [0x00,0x01,0x01,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x1 slc
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 lwe ; encoding: [0x00,0x01,0x03,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x1 lwe
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 da ; encoding: [0x00,0x41,0x01,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x1 da
 
 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0x05,0x02,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_load a5, v[2:5], s[8:15] dmask:0x1 d16
 
 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0x02,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_store a252, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0x02,0xfc,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a252, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_store a1, v[252:255], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0xfc,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[252:255], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_store a1, v[2:5], s[16:23] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0x02,0x01,0x04,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[16:23] dmask:0x1 unorm
 
 // GFX90A: image_store a1, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0x02,0x01,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x2 unorm ; encoding: [0x00,0x12,0x21,0xf0,0x02,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] dmask:0x2 unorm
 
 // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0x3 unorm ; encoding: [0x00,0x13,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:3], v[2:5], s[12:19] dmask:0x3 unorm
 
 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x4 unorm ; encoding: [0x00,0x14,0x21,0xf0,0x02,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] dmask:0x4 unorm
 
 // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0x5 unorm ; encoding: [0x00,0x15,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:3], v[2:5], s[12:19] dmask:0x5 unorm
 
 // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0x6 unorm ; encoding: [0x00,0x16,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:3], v[2:5], s[12:19] dmask:0x6 unorm
 
 // GFX90A: image_store a[2:4], v[2:5], s[12:19] dmask:0x7 unorm ; encoding: [0x00,0x17,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:4], v[2:5], s[12:19] dmask:0x7 unorm
 
 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x8 unorm ; encoding: [0x00,0x18,0x21,0xf0,0x02,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] dmask:0x8 unorm
 
 // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0x9 unorm ; encoding: [0x00,0x19,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:3], v[2:5], s[12:19] dmask:0x9 unorm
 
 // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0xa unorm ; encoding: [0x00,0x1a,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:3], v[2:5], s[12:19] dmask:0xa unorm
 
 // GFX90A: image_store a[2:4], v[2:5], s[12:19] dmask:0xb unorm ; encoding: [0x00,0x1b,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:4], v[2:5], s[12:19] dmask:0xb unorm
 
 // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0xc unorm ; encoding: [0x00,0x1c,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:3], v[2:5], s[12:19] dmask:0xc unorm
 
 // GFX90A: image_store a[2:4], v[2:5], s[12:19] dmask:0xd unorm ; encoding: [0x00,0x1d,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:4], v[2:5], s[12:19] dmask:0xd unorm
 
 // GFX90A: image_store a[2:4], v[2:5], s[12:19] dmask:0xe unorm ; encoding: [0x00,0x1e,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:4], v[2:5], s[12:19] dmask:0xe unorm
 
 // GFX90A: image_store a[2:5], v[2:5], s[12:19] dmask:0xf unorm ; encoding: [0x00,0x1f,0x21,0xf0,0x02,0x02,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a[2:5], v[2:5], s[12:19] dmask:0xf unorm
 
 // GFX90A: image_store a1, v[2:5], s[12:19] unorm ; encoding: [0x00,0x10,0x21,0xf0,0x02,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] unorm
 
 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x21,0xf0,0x02,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] dmask:0x1 unorm glc
 
 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x21,0xf2,0x02,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] dmask:0x1 unorm slc
 
 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x23,0xf0,0x02,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] dmask:0x1 unorm lwe
 
 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x21,0xf0,0x02,0x01,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] dmask:0x1 unorm da
 
 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm d16 ; encoding: [0x00,0x11,0x21,0xf0,0x02,0x01,0x03,0x80]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_store a1, v[2:5], s[12:19] dmask:0x1 unorm d16
 
 // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_swap a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_swap a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_swap a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_swap a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_swap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x41,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x41,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x41,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x43,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x41,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_cmpswap a[252:253], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[252:253], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_cmpswap a[6:7], v[252:255], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0xfc,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[6:7], v[252:255], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[12:19] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0x06,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[6:7], v[2:5], s[12:19] dmask:0x3 unorm
 
 // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[92:99] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0x06,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[6:7], v[2:5], s[92:99] dmask:0x3 unorm
 
 // GFX90A: image_atomic_cmpswap a[6:9], v[2:5], s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x45,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[6:9], v[2:5], s[8:15] dmask:0xf unorm
 
 // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x45,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm glc
 
 // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm slc ; encoding: [0x00,0x13,0x45,0xf2,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm slc
 
 // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm lwe ; encoding: [0x00,0x13,0x47,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm lwe
 
 // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm da ; encoding: [0x00,0x53,0x45,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm da
 
 // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_add a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_add a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_add a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_add a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_add a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x49,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x49,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x49,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x4b,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x49,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_sub a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_sub a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_sub a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_sub a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_sub a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x4d,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x4d,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x4d,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x4f,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x4d,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smin a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smin a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smin a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smin a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smin a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x51,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x51,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x51,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x53,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x51,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umin a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umin a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umin a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umin a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umin a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x55,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x55,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x55,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x57,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x55,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smax a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smax a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smax a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smax a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_smax a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x59,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x59,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x59,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x5b,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x59,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umax a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umax a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umax a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umax a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_umax a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x5d,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x5d,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x5d,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x5f,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x5d,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_and a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_and a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_and a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_and a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_and a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x61,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x61,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x61,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x63,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x61,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_or a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_or a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_or a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_or a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_or a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x65,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x65,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x65,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x67,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x65,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_xor a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_xor a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_xor a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_xor a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_xor a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x69,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x69,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x69,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x6b,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x69,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_inc a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_inc a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_inc a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_inc a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_inc a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x6d,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x6d,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x6d,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x6f,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x6d,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_dec a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0xfc,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a252, v[2:5], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_dec a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0xfc,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a5, v[252:255], s[8:15] dmask:0x1 unorm
 
 // GFX90A: image_atomic_dec a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0x05,0x03,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a5, v[2:5], s[12:19] dmask:0x1 unorm
 
 // GFX90A: image_atomic_dec a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0x05,0x17,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a5, v[2:5], s[92:99] dmask:0x1 unorm
 
 // GFX90A: image_atomic_dec a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x71,0xf0,0x02,0x06,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a[6:7], v[2:5], s[8:15] dmask:0x3 unorm
 
 // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x71,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm glc
 
 // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x71,0xf2,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm slc
 
 // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x73,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm lwe
 
 // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x71,0xf0,0x02,0x05,0x02,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm da
 
 // GFX90A: image_sample a5, v[0:3], s[8:15], s[12:15] dmask:0x1 ; encoding: [0x00,0x01,0x81,0xf0,0x00,0x05,0x62,0x00]
-// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU
+// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 image_sample a5, v[0:3], s[8:15], s[12:15] dmask:0x1
diff --git a/llvm/test/MC/AMDGPU/gfx950-unsupported.s b/llvm/test/MC/AMDGPU/gfx950-unsupported.s
index 8bdab2d..cea81b2 100644
--- a/llvm/test/MC/AMDGPU/gfx950-unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx950-unsupported.s
@@ -183,7 +183,7 @@ v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7]
 // ds_read_b64_tr_b4
 //===----------------------------------------------------------------------===//
 ds_read_b64_tr_b4 v[1:2], v0
-// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
 ds_read_b64_tr_b4 v1, v0
@@ -202,7 +202,7 @@ ds_read_b64_tr_b4 v[2:3], v2 offset:-64
 //ds_read_b64_tr_b8
 //===----------------------------------------------------------------------===//
 ds_read_b64_tr_b8 v[1:2], v0
-// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
 ds_read_b64_tr_b8 v1, v0
@@ -221,7 +221,7 @@ ds_read_b64_tr_b8 v[2:3], v2 offset:-64
 // ds_read_b64_tr_b16
 //===----------------------------------------------------------------------===//
 ds_read_b64_tr_b16 v[1:2], v0
-// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
 
 ds_read_b64_tr_b16 v1, v0
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index 7839475..78aa8f2 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -1,935 +1,1849 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=SICI,CI
-// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GFX89
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX89,GFX9
-
-// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error:
-// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCIVI,NOSICIVI --implicit-check-not=error:
-// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICIVI,NOVI,NOGFX89 --implicit-check-not=error:
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX89,NOGFX9 --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX89
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX89,GFX9
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX11
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX12
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX1250
+
+// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOSI --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCI --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX89,NOVI --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX89,NOGFX9 --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX11 --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX12 --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 %s -mattr=+real-true16 -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX1250 --implicit-check-not=error:
 
 //---------------------------------------------------------------------------//
 // fp literal, expected fp operand
 //---------------------------------------------------------------------------//
 
-// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e]
 v_fract_f64 v[0:1], 0.5
+// SICI: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 0.5             ; encoding: [0xf0,0x7c,0x00,0x7e]
 
-// SICI: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e]
-// GFX89: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x50,0x00,0x7e]
 v_sqrt_f64 v[0:1], -4.0
+// SICI: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x68,0x00,0x7e]
+// GFX89: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x50,0x00,0x7e]
+// GFX12XX: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x68,0x00,0x7e]
+// GFX11: v_sqrt_f64_e32 v[0:1], -4.0             ; encoding: [0xf7,0x68,0x00,0x7e]
 
-// SICI: v_log_clamp_f32_e32 v1, 0.5 ; encoding: [0xf0,0x4c,0x02,0x7e]
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
 v_log_clamp_f32 v1, 0.5
+// NOGFX8PLUS: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// SICI: v_log_clamp_f32_e32 v1, 0.5             ; encoding: [0xf0,0x4c,0x02,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e]
-v_fract_f64 v[0:1], 0.5
-
-// SICI: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x38,0x00,0x7e]
 v_trunc_f32 v0, 0.5
+// SICI: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0.5                 ; encoding: [0xf0,0x42,0x00,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x64,0x00,0x7e]
 v_fract_f64 v[0:1], -1.0
+// SICI: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], -1.0            ; encoding: [0xf3,0x7c,0x00,0x7e]
 
-// SICI: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x38,0x00,0x7e]
 v_trunc_f32 v0, -1.0
+// SICI: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, -1.0                ; encoding: [0xf3,0x42,0x00,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x64,0x00,0x7e]
 v_fract_f64 v[0:1], 4.0
+// SICI: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 4.0             ; encoding: [0xf6,0x7c,0x00,0x7e]
 
-// SICI: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x38,0x00,0x7e]
 v_trunc_f32 v0, 4.0
+// SICI: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 4.0                 ; encoding: [0xf6,0x42,0x00,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e]
 v_fract_f64 v[0:1], 0.0
+// SICI: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 0               ; encoding: [0x80,0x7c,0x00,0x7e]
 
-// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e]
 v_trunc_f32 v0, 0.0
+// SICI: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
-// GFX89: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf8,0x3f]
 v_fract_f64 v[0:1], 1.5
+// SICI: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
+// GFX89: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf8,0x3f]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
+// GFX11: v_fract_f64_e32 v[0:1], 0x3ff80000      ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f]
 
-// SICI: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
-// GFX89: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0xc0,0x3f]
 v_trunc_f32 v0, 1.5
+// SICI: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
+// GFX89: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0xc0,0x3f]
+// GFX12XX: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
+// GFX11: v_trunc_f32_e32 v0, 0x3fc00000          ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f]
 
-// SICI: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
-// GFX89: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x64,0x00,0x7e,0xca,0x21,0x09,0xc0]
 v_fract_f64 v[0:1], -3.1415
+// SICI: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
+// GFX89: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x64,0x00,0x7e,0xca,0x21,0x09,0xc0]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
+// GFX12: v_fract_f64_e32 v[0:1], 0xc00921ca      ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0]
+// GFX1250: v_fract_f64_e32 v[0:1], 0xc00921cac083126f ; encoding: [0xfe,0x7c,0x00,0x7e,0x6f,0x12,0x83,0xc0,0xca,0x21,0x09,0xc0]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
-// SICI: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
-// GFX89: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x38,0x00,0x7e,0x56,0x0e,0x49,0xc0]
 v_trunc_f32 v0, -3.1415
+// SICI: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
+// GFX89: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x38,0x00,0x7e,0x56,0x0e,0x49,0xc0]
+// GFX12XX: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
+// GFX11: v_trunc_f32_e32 v0, 0xc0490e56          ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0]
 
-// SICI: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
-// GFX89: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x64,0x00,0x7e,0x02,0x2d,0xb5,0x44]
 v_fract_f64 v[0:1], 100000000000000000000000.0
+// SICI: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
+// GFX89: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x64,0x00,0x7e,0x02,0x2d,0xb5,0x44]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
+// GFX12: v_fract_f64_e32 v[0:1], 0x44b52d02      ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44]
+// GFX1250: v_fract_f64_e32 v[0:1], 0x44b52d02c7e14af6 ; encoding: [0xfe,0x7c,0x00,0x7e,0xf6,0x4a,0xe1,0xc7,0x02,0x2d,0xb5,0x44]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
-// SICI: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
-// GFX89: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x38,0x00,0x7e,0x16,0x68,0xa9,0x65]
 v_trunc_f32 v0, 100000000000000000000000.0
+// SICI: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
+// GFX89: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x38,0x00,0x7e,0x16,0x68,0xa9,0x65]
+// GFX12XX: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
+// GFX11: v_trunc_f32_e32 v0, 0x65a96816          ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65]
 
-// SICI: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
-// GFX89: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x64,0x00,0x7e,0xd0,0x12,0x63,0x41]
 v_fract_f64 v[0:1], 10000000.0
+// SICI: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
+// GFX89: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x64,0x00,0x7e,0xd0,0x12,0x63,0x41]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
+// GFX11: v_fract_f64_e32 v[0:1], 0x416312d0      ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41]
 
-// SICI: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
-// GFX89: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x38,0x00,0x7e,0x80,0x96,0x18,0x4b]
 v_trunc_f32 v0, 10000000.0
+// SICI: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
+// GFX89: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x38,0x00,0x7e,0x80,0x96,0x18,0x4b]
+// GFX12XX: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
+// GFX11: v_trunc_f32_e32 v0, 0x4b189680          ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b]
 
-// SICI: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
-// GFX89: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xef,0x47]
 v_fract_f64 v[0:1], 3.402823e+38
+// SICI: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
+// GFX89: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xef,0x47]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
+// GFX12: v_fract_f64_e32 v[0:1], 0x47efffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47]
+// GFX1250: v_fract_f64_e32 v[0:1], 0x47efffff966ad924 ; encoding: [0xfe,0x7c,0x00,0x7e,0x24,0xd9,0x6a,0x96,0xff,0xff,0xef,0x47]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
-// SICI: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
-// GFX89: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x38,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
 v_trunc_f32 v0, 3.402823e+38
+// SICI: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
+// GFX89: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x38,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
+// GFX12XX: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
+// GFX11: v_trunc_f32_e32 v0, 0x7f7ffffd          ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f]
 
-// SICI: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
-// GFX89: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0x1f,0x38]
 v_fract_f64 v[0:1], 2.3509886e-38
+// SICI: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
+// GFX89: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0x1f,0x38]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
+// GFX12: v_fract_f64_e32 v[0:1], 0x381fffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38]
+// GFX1250: v_fract_f64_e32 v[0:1], 0x381fffffe8c9d9fb ; encoding: [0xfe,0x7c,0x00,0x7e,0xfb,0xd9,0xc9,0xe8,0xff,0xff,0x1f,0x38]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
-// SICI: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
-// GFX89: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x38,0x00,0x7e,0xff,0xff,0xff,0x00]
 v_trunc_f32 v0, 2.3509886e-38
+// SICI: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
+// GFX89: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x38,0x00,0x7e,0xff,0xff,0xff,0x00]
+// GFX12XX: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
+// GFX11: v_trunc_f32_e32 v0, 0xffffff            ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00]
 
-// SICI: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
-// GFX89: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x64,0x00,0x7e,0x23,0xf6,0x79,0x31]
 v_fract_f64 v[0:1], 2.3509886e-70
+// SICI: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
+// GFX89: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x64,0x00,0x7e,0x23,0xf6,0x79,0x31]
+// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
+// GFX12: v_fract_f64_e32 v[0:1], 0x3179f623      ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31]
+// GFX1250: v_fract_f64_e32 v[0:1], 0x3179f623c2d3cf3c ; encoding: [0xfe,0x7c,0x00,0x7e,0x3c,0xcf,0xd3,0xc2,0x23,0xf6,0x79,0x31]
+// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f32 v0, 2.3509886e-70
+// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
+
+v_fract_f64_e32 v[0:1], 1.0
+// SICI: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 1.0             ; encoding: [0xf2,0x7c,0x00,0x7e]
+
+v_fract_f64_e32 v[0:1], lit(1.0)
+// SICI: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
+// GFX89: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf0,0x3f]
+// GFX11: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
+// GFX12: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
+// GFX1250: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00]
+
+v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0xca,0x1b]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:54: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-4]]:54: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-5]]:54: error: invalid operand for instruction
+
+v_cos_f16_e32 v5.l, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// GFX11: v_cos_f16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xc2,0x0a,0x7e]
+// GFX1250: v_cos_f16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xc2,0x0a,0x7e]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_cos_f16_e32 v5.l, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// GFX11: v_cos_f16_e32 v5.l, lit(0x3c00)         ; encoding: [0xff,0xc2,0x0a,0x7e,0x00,0x3c,0x00,0x00]
+// GFX1250: v_cos_f16_e32 v5.l, lit(0x3c00)         ; encoding: [0xff,0xc2,0x0a,0x7e,0x00,0x3c,0x00,0x00]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_tanh_bf16 v5, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_tanh_bf16_e32 v5, 1.0                 ; encoding: [0xf2,0x94,0x0a,0x7e]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_tanh_bf16 v5, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_tanh_bf16_e32 v5, lit(0x3f80)         ; encoding: [0xff,0x94,0x0a,0x7e,0x80,0x3f,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_trunc_f32_e32 v0, 1.0
+// SICI: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 1.0                 ; encoding: [0xf2,0x42,0x00,0x7e]
+
+v_trunc_f32_e32 v0, lit(1.0)
+// SICI: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
+// GFX89: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0x80,0x3f]
+// GFX12XX: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
+// GFX11: v_trunc_f32_e32 v0, lit(0x3f800000)     ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f]
+
+v_dot2_bf16_bf16 v5.l, v1, v2, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, 1.0      ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_bf16_bf16 v5.l, v1, v2, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, lit(0x3f80) ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0x80,0x3f,0x00,0x00]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_f32_f16 v5, v1, 1.0, v2
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_f32_f16 v5, v1, 1.0, v2          ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xe5,0x09,0x1c]
+// GFX12: v_dot2_f32_f16 v5, v1, 1.0, v2          ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xe5,0x09,0x1c]
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_f32_f16 v5, v1, lit(1.0), v2
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_f32_f16 v5, v1, lit(0x3c00), v2  ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x00,0x3c,0x00,0x00]
+// GFX12: v_dot2_f32_f16 v5, v1, lit(0x3c00), v2  ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x00,0x3c,0x00,0x00]
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_fp8_f16 v1.l, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3c00           ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_fp8_f16 v1.l, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x3c00)      ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // fp literal, expected int operand
 //---------------------------------------------------------------------------//
 
-// SICI: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x01,0x80,0xbe]
 s_mov_b64_e32 s[0:1], 0.5
+// GFX8PLUS: s_mov_b64 s[0:1], 0.5                   ; encoding: [0xf0,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 0.5                   ; encoding: [0xf0,0x04,0x80,0xbe]
+
+s_mov_b64 s[0:1], lit(0.5)
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-// SICI: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x26]
 v_and_b32_e32 v0, 0.5, v1
+// SICI: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 0.5, v1               ; encoding: [0xf0,0x02,0x00,0x36]
 
-// SICI: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf0,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf0,0x02,0x02,0x00]
 v_and_b32_e64 v0, 0.5, v1
+// SICI: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x36,0xd2,0xf0,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x13,0xd1,0xf0,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf0,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 0.5, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf0,0x02,0x02,0x00]
 
-// SICI: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x01,0x80,0xbe]
 s_mov_b64_e32 s[0:1], -1.0
+// GFX8PLUS: s_mov_b64 s[0:1], -1.0                  ; encoding: [0xf3,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], -1.0                  ; encoding: [0xf3,0x04,0x80,0xbe]
 
-// SICI: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x26]
 v_and_b32_e32 v0, -1.0, v1
+// SICI: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, -1.0, v1              ; encoding: [0xf3,0x02,0x00,0x36]
 
-// SICI: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf3,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf3,0x02,0x02,0x00]
 v_and_b32_e64 v0, -1.0, v1
+// SICI: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x36,0xd2,0xf3,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x13,0xd1,0xf3,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x1b,0xd5,0xf3,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, -1.0, v1              ; encoding: [0x00,0x00,0x1b,0xd5,0xf3,0x02,0x02,0x00]
 
-// SICI: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x01,0x80,0xbe]
 s_mov_b64_e32 s[0:1], 4.0
+// GFX8PLUS: s_mov_b64 s[0:1], 4.0                   ; encoding: [0xf6,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 4.0                   ; encoding: [0xf6,0x04,0x80,0xbe]
 
-// SICI: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x26]
 v_and_b32_e32 v0, 4.0, v1
+// SICI: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 4.0, v1               ; encoding: [0xf6,0x02,0x00,0x36]
 
-// SICI: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf6,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf6,0x02,0x02,0x00]
 v_and_b32_e64 v0, 4.0, v1
+// SICI: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x36,0xd2,0xf6,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x13,0xd1,0xf6,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf6,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 4.0, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xf6,0x02,0x02,0x00]
 
-// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe]
 s_mov_b64_e32 s[0:1], 0.0
+// GFX8PLUS: s_mov_b64 s[0:1], 0                     ; encoding: [0x80,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 0                     ; encoding: [0x80,0x04,0x80,0xbe]
 
-// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26]
 v_and_b32_e32 v0, 0.0, v1
+// SICI: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
 
-// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
 v_and_b32_e64 v0, 0.0, v1
+// SICI: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64_e32 s[0:1], 1.5
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-// SICI: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
-// GFX89: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0xc0,0x3f]
 v_and_b32_e32 v0, 1.5, v1
+// SICI: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
+// GFX89: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0xc0,0x3f]
+// GFX12XX: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
+// GFX11: v_and_b32_e32 v0, 0x3fc00000, v1        ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64_e32 s[0:1], -3.1415
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-// SICI: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
-// GFX89: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x26,0x56,0x0e,0x49,0xc0]
 v_and_b32_e32 v0, -3.1415, v1
+// SICI: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
+// GFX89: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x26,0x56,0x0e,0x49,0xc0]
+// GFX12XX: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
+// GFX11: v_and_b32_e32 v0, 0xc0490e56, v1        ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64_e32 s[0:1], 100000000000000000000000.0
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-// SICI: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
-// GFX89: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x26,0x16,0x68,0xa9,0x65]
 v_and_b32_e32 v0, 100000000000000000000000.0, v1
+// SICI: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
+// GFX89: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x26,0x16,0x68,0xa9,0x65]
+// GFX12XX: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
+// GFX11: v_and_b32_e32 v0, 0x65a96816, v1        ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64_e32 s[0:1], 10000000.0
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-// SICI: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
-// GFX89: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x26,0x80,0x96,0x18,0x4b]
 v_and_b32_e32 v0, 10000000.0, v1
+// SICI: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
+// GFX89: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x26,0x80,0x96,0x18,0x4b]
+// GFX12XX: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
+// GFX11: v_and_b32_e32 v0, 0x4b189680, v1        ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64_e32 s[0:1], 3.402823e+38
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-// SICI: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
-// GFX89: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x26,0xfd,0xff,0x7f,0x7f]
 v_and_b32_e32 v0, 3.402823e+38, v1
+// SICI: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
+// GFX89: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x26,0xfd,0xff,0x7f,0x7f]
+// GFX12XX: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
+// GFX11: v_and_b32_e32 v0, 0x7f7ffffd, v1        ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64_e32 s[0:1], 2.3509886e-38
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-// SICI: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
-// GFX89: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x26,0xff,0xff,0xff,0x00]
 v_and_b32_e32 v0, 2.3509886e-38, v1
+// SICI: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
+// GFX89: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x26,0xff,0xff,0xff,0x00]
+// GFX12XX: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
+// GFX11: v_and_b32_e32 v0, 0xffffff, v1          ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64_e32 s[0:1], 2.3509886e-70
+// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_and_b32_e32 v0, 2.3509886e-70, v1
+// NOGCN: :[[@LINE-1]]:19: error: invalid operand for instruction
+
+v_not_b16 v5.l, 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_not_b16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xd2,0x0a,0x7e]
+// GFX1250: v_not_b16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xd2,0x0a,0x7e]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_not_b16 v5.l, lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_not_b16_e32 v5.l, lit(0x3f800000)     ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f]
+// GFX1250: v_not_b16_e32 v5.l, lit(0x3f800000)     ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_and_b32_e32 v0, 1.0, v1
+// SICI: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 1.0, v1               ; encoding: [0xf2,0x02,0x00,0x36]
+
+v_and_b32_e32 v0, lit(1.0), v1
+// SICI: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
+// GFX89: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0x80,0x3f]
+// GFX12XX: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
+// GFX11: v_and_b32_e32 v0, lit(0x3f800000), v1   ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f]
+
+v_pk_add_u16 v5, exec_lo, 1.0
+// GFX12XX: v_pk_add_u16 v5, exec_lo, 1.0           ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xe4,0x01,0x18]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_pk_add_u16 v5, exec_lo, 1.0           ; encoding: [0x05,0x40,0x8a,0xd3,0x7e,0xe4,0x01,0x18]
+// GFX11: v_pk_add_u16 v5, exec_lo, 1.0           ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xe4,0x01,0x18]
+// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_pk_add_u16 v5, exec_lo, lit(1.0)
+// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x3f800000) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x00,0x00,0x80,0x3f]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_pk_add_u16 v5, exec_lo, lit(0x3f800000) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x00,0x00,0x80,0x3f]
+// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-5]]:31: error: invalid operand (violates constant bus restrictions)
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1.0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1.0 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xca,0x03]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(1.0)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(0x3f800000) ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x00,0x00,0x80,0x3f]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // int literal, expected fp operand
 //---------------------------------------------------------------------------//
 
-// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e]
 v_trunc_f32_e32 v0, 0
+// SICI: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0                   ; encoding: [0x80,0x42,0x00,0x7e]
+
+v_fract_f64_e32 v[0:1], 1
+// SICI: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 1               ; encoding: [0x81,0x7c,0x00,0x7e]
+
+v_fract_f64_e32 v[0:1], lit(1)
+// SICI: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x64,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX11: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX12: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX1250: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 
-// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e]
-v_fract_f64_e32 v[0:1], 0
-
-// SICI: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x42,0xd3,0x80,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00]
 v_trunc_f32_e64 v0, 0
+// SICI: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0x42,0xd3,0x80,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0xa1,0xd5,0x80,0x00,0x00,0x00]
+// GFX11: v_trunc_f32_e64 v0, 0                   ; encoding: [0x00,0x00,0xa1,0xd5,0x80,0x00,0x00,0x00]
 
-// SICI: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x72,0xd1,0x80,0x00,0x00,0x00]
 v_fract_f64_e64 v[0:1], 0
+// SICI: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0x72,0xd1,0x80,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0xbe,0xd5,0x80,0x00,0x00,0x00]
+// GFX11: v_fract_f64_e64 v[0:1], 0               ; encoding: [0x00,0x00,0xbe,0xd5,0x80,0x00,0x00,0x00]
 
-// SICI: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x38,0x00,0x7e]
 v_trunc_f32_e32 v0, -13
+// SICI: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, -13                 ; encoding: [0xcd,0x42,0x00,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x64,0x00,0x7e]
 v_fract_f64_e32 v[0:1], -13
+// SICI: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], -13             ; encoding: [0xcd,0x7c,0x00,0x7e]
 
-// SICI: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x42,0xd3,0xcd,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x5c,0xd1,0xcd,0x00,0x00,0x00]
 v_trunc_f32_e64 v0, -13
+// SICI: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0x42,0xd3,0xcd,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0x5c,0xd1,0xcd,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0xa1,0xd5,0xcd,0x00,0x00,0x00]
+// GFX11: v_trunc_f32_e64 v0, -13                 ; encoding: [0x00,0x00,0xa1,0xd5,0xcd,0x00,0x00,0x00]
 
-// SICI: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x7c,0xd3,0xcd,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x72,0xd1,0xcd,0x00,0x00,0x00]
 v_fract_f64_e64 v[0:1], -13
+// SICI: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0x7c,0xd3,0xcd,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0x72,0xd1,0xcd,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0xbe,0xd5,0xcd,0x00,0x00,0x00]
+// GFX11: v_fract_f64_e64 v[0:1], -13             ; encoding: [0x00,0x00,0xbe,0xd5,0xcd,0x00,0x00,0x00]
 
-// SICI: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x38,0x00,0x7e]
 v_trunc_f32_e32 v0, 35
+// SICI: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 35                  ; encoding: [0xa3,0x42,0x00,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x64,0x00,0x7e]
 v_fract_f64_e32 v[0:1], 35
+// SICI: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], 35              ; encoding: [0xa3,0x7c,0x00,0x7e]
 
-// SICI: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x42,0xd3,0xa3,0x00,0x00,0x00]
-// GFX89: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x5c,0xd1,0xa3,0x00,0x00,0x00]
 v_trunc_f32_e64 v0, 35
+// SICI: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0x42,0xd3,0xa3,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0x5c,0xd1,0xa3,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0xa1,0xd5,0xa3,0x00,0x00,0x00]
+// GFX11: v_trunc_f32_e64 v0, 35                  ; encoding: [0x00,0x00,0xa1,0xd5,0xa3,0x00,0x00,0x00]
 
-// SICI: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x7c,0xd3,0xa3,0x00,0x00,0x00]
-// GFX89: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x72,0xd1,0xa3,0x00,0x00,0x00]
 v_fract_f64_e64 v[0:1], 35
+// SICI: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0x7c,0xd3,0xa3,0x00,0x00,0x00]
+// GFX89: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0x72,0xd1,0xa3,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0xbe,0xd5,0xa3,0x00,0x00,0x00]
+// GFX11: v_fract_f64_e64 v[0:1], 35              ; encoding: [0x00,0x00,0xbe,0xd5,0xa3,0x00,0x00,0x00]
 
-// SICI: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// GFX89: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x38,0x00,0x7e,0xd2,0x04,0x00,0x00]
 v_trunc_f32_e32 v0, 1234
+// SICI: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX89: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x38,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX12XX: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX11: v_trunc_f32_e32 v0, 0x4d2               ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00]
 
-// SICI: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
-// GFX89: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00]
 v_fract_f64_e32 v[0:1], 1234
+// SICI: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX89: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
+// GFX11: v_fract_f64_e32 v[0:1], 0x4d2           ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
 v_trunc_f32_e64 v0, 1234
+// GFX12XX: v_trunc_f32_e64 v0, 0x4d2               ; encoding: [0x00,0x00,0xa1,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:21: error: literal operands are not supported
+// NOGFX89: :[[@LINE-3]]:21: error: literal operands are not supported
+// GFX11: v_trunc_f32_e64 v0, 0x4d2               ; encoding: [0x00,0x00,0xa1,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICIVI: :[[@LINE-1]]:21: error: literal operands are not supported
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
 v_fract_f64_e64 v[0:1], 1234
+// GFX12XX: v_fract_f64_e64 v[0:1], 0x4d2           ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:25: error: literal operands are not supported
+// NOGFX89: :[[@LINE-3]]:25: error: literal operands are not supported
+// GFX11: v_fract_f64_e64 v[0:1], 0x4d2           ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00]
+// NOSICIVI: :[[@LINE-1]]:25: error: literal operands are not supported
 
-// SICI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// GFX89: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x38,0x00,0x7e,0xcf,0x2b,0xff,0xff]
 v_trunc_f32_e32 v0, -54321
+// SICI: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX89: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x38,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX12XX: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX11: v_trunc_f32_e32 v0, 0xffff2bcf          ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff]
 
-// SICI: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
-// GFX89: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x64,0x00,0x7e,0xcf,0x2b,0xff,0xff]
 v_fract_f64_e32 v[0:1], -54321
+// SICI: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX89: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x64,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
+// GFX11: v_fract_f64_e32 v[0:1], 0xffff2bcf      ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff]
 
-// SICI: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// GFX89: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x38,0x00,0x7e,0xef,0xbe,0xad,0xde]
 v_trunc_f32_e32 v0, 0xdeadbeef
+// SICI: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX89: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x38,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX12XX: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX11: v_trunc_f32_e32 v0, 0xdeadbeef          ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde]
 
-// SICI: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
-// GFX89: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x64,0x00,0x7e,0xef,0xbe,0xad,0xde]
 v_fract_f64_e32 v[0:1], 0xdeadbeef
+// SICI: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX89: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x64,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
+// GFX11: v_fract_f64_e32 v[0:1], 0xdeadbeef      ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde]
 
-// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e]
 v_trunc_f32_e32 v0, 0xffffffff
+// SICI: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
-// GFX89: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xff,0xff]
 v_fract_f64_e32 v[0:1], 0xffffffff
+// SICI: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
+// GFX89: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xff,0xff]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
+// GFX11: v_fract_f64_e32 v[0:1], 0xffffffff      ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f32_e32 v0, 0x123456789abcdef0
+// NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_fract_f64_e32 v[0:1], 0x123456789abcdef0
+// NOSICI: :[[@LINE-1]]:25: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:25: error: invalid operand for instruction
+// GFX1250: v_fract_f64_e32 v[0:1], 0x123456789abcdef0 ; encoding: [0xfe,0x7c,0x00,0x7e,0xf0,0xde,0xbc,0x9a,0x78,0x56,0x34,0x12]
+// NOGFX11: :[[@LINE-4]]:25: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:25: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e]
-// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e]
 v_trunc_f32_e32 v0, 0xffffffffffffffff
+// SICI: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, -1                  ; encoding: [0xc1,0x42,0x00,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e]
-// GFX89: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x64,0x00,0x7e]
 v_fract_f64_e32 v[0:1], 0xffffffffffffffff
+// SICI: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x7c,0x00,0x7e]
+// GFX89: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x7c,0x00,0x7e]
+// GFX11: v_fract_f64_e32 v[0:1], -1              ; encoding: [0xc1,0x7c,0x00,0x7e]
+
+v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0x06,0x1a]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:54: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-4]]:54: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-5]]:54: error: invalid operand for instruction
+
+v_cos_f16_e32 v5.l, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// GFX11: v_cos_f16_e32 v5.l, 1                   ; encoding: [0x81,0xc2,0x0a,0x7e]
+// GFX1250: v_cos_f16_e32 v5.l, 1                   ; encoding: [0x81,0xc2,0x0a,0x7e]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_cos_f16_e32 v5.l, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+// GFX11: v_cos_f16_e32 v5.l, lit(0x1)            ; encoding: [0xff,0xc2,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// GFX1250: v_cos_f16_e32 v5.l, lit(0x1)            ; encoding: [0xff,0xc2,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_tanh_bf16 v5, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_tanh_bf16_e32 v5, 1                   ; encoding: [0x81,0x94,0x0a,0x7e]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_tanh_bf16 v5, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_tanh_bf16_e32 v5, lit(0x1)            ; encoding: [0xff,0x94,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_trunc_f32_e32 v0, 1
+// SICI: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x42,0x00,0x7e]
+// GFX89: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 1                   ; encoding: [0x81,0x42,0x00,0x7e]
+
+v_trunc_f32_e32 v0, lit(1)
+// SICI: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX89: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x38,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
+// GFX11: v_trunc_f32_e32 v0, lit(0x1)            ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00]
+
+v_dot2_bf16_bf16 v5.l, v1, v2, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, 1        ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x06,0x02]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_bf16_bf16 v5.l, v1, v2, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, lit(0x1) ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0x01,0x00,0x00,0x00]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_f32_f16 v5, v1, 1, v2
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_f32_f16 v5, v1, 1, v2            ; encoding: [0x05,0x40,0x13,0xcc,0x01,0x03,0x09,0x1c]
+// GFX12: v_dot2_f32_f16 v5, v1, 1, v2            ; encoding: [0x05,0x40,0x13,0xcc,0x01,0x03,0x09,0x1c]
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_dot2_f32_f16 v5, v1, lit(1), v2
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_dot2_f32_f16 v5, v1, lit(0x1), v2     ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x01,0x00,0x00,0x00]
+// GFX12: v_dot2_f32_f16 v5, v1, lit(0x1), v2     ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x01,0x00,0x00,0x00]
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_fp8_f16 v1.l, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_cvt_pk_fp8_f16 v1.l, 1                ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_cvt_pk_fp8_f16 v1.l, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x1)         ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // int literal, expected int operand
 //---------------------------------------------------------------------------//
 
-// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe]
 s_mov_b64_e32 s[0:1], 0
+// GFX8PLUS: s_mov_b64 s[0:1], 0                     ; encoding: [0x80,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 0                     ; encoding: [0x80,0x04,0x80,0xbe]
 
-// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26]
 v_and_b32_e32 v0, 0, v1
+// SICI: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 0, v1                 ; encoding: [0x80,0x02,0x00,0x36]
 
-// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
 v_and_b32_e64 v0, 0, v1
+// SICI: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 0, v1                 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00]
 
-// SICI: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x01,0x80,0xbe]
 s_mov_b64_e32 s[0:1], -13
+// GFX8PLUS: s_mov_b64 s[0:1], -13                   ; encoding: [0xcd,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], -13                   ; encoding: [0xcd,0x04,0x80,0xbe]
 
-// SICI: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x26]
 v_and_b32_e32 v0, -13, v1
+// SICI: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, -13, v1               ; encoding: [0xcd,0x02,0x00,0x36]
 
-// SICI: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xcd,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xcd,0x02,0x02,0x00]
 v_and_b32_e64 v0, -13, v1
+// SICI: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x36,0xd2,0xcd,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x13,0xd1,0xcd,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xcd,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, -13, v1               ; encoding: [0x00,0x00,0x1b,0xd5,0xcd,0x02,0x02,0x00]
 
-// SICI: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x01,0x80,0xbe]
 s_mov_b64_e32 s[0:1], 35
+// GFX8PLUS: s_mov_b64 s[0:1], 35                    ; encoding: [0xa3,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 35                    ; encoding: [0xa3,0x04,0x80,0xbe]
 
-// SICI: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x26]
 v_and_b32_e32 v0, 35, v1
+// SICI: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 35, v1                ; encoding: [0xa3,0x02,0x00,0x36]
 
-// SICI: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xa3,0x02,0x02,0x00]
-// GFX89: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xa3,0x02,0x02,0x00]
 v_and_b32_e64 v0, 35, v1
+// SICI: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x36,0xd2,0xa3,0x02,0x02,0x00]
+// GFX89: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x13,0xd1,0xa3,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x1b,0xd5,0xa3,0x02,0x02,0x00]
+// GFX11: v_and_b32_e64 v0, 35, v1                ; encoding: [0x00,0x00,0x1b,0xd5,0xa3,0x02,0x02,0x00]
 
-// SICI: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x04,0x80,0xbe,0xd2,0x04,0x00,0x00]
-// GFX89: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x01,0x80,0xbe,0xd2,0x04,0x00,0x00]
 s_mov_b64_e32 s[0:1], 1234
+// GFX8PLUS: s_mov_b64 s[0:1], 0x4d2                 ; encoding: [0xff,0x01,0x80,0xbe,0xd2,0x04,0x00,0x00]
+// SICI: s_mov_b64 s[0:1], 0x4d2                 ; encoding: [0xff,0x04,0x80,0xbe,0xd2,0x04,0x00,0x00]
 
-// SICI: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
-// GFX89: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00]
 v_and_b32_e32 v0, 1234, v1
+// SICI: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
+// GFX89: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00]
+// GFX12XX: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
+// GFX11: v_and_b32_e32 v0, 0x4d2, v1             ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
 v_and_b32_e64 v0, 1234, v1
+// GFX12XX: v_and_b32_e64 v0, 0x4d2, v1             ; encoding: [0x00,0x00,0x1b,0xd5,0xff,0x02,0x02,0x00,0xd2,0x04,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:19: error: literal operands are not supported
+// NOGFX89: :[[@LINE-3]]:19: error: literal operands are not supported
+// GFX11: v_and_b32_e64 v0, 0x4d2, v1             ; encoding: [0x00,0x00,0x1b,0xd5,0xff,0x02,0x02,0x00,0xd2,0x04,0x00,0x00]
+// NOSICIVI: :[[@LINE-1]]:19: error: literal operands are not supported
 
-// SICI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff]
-// GFX89: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
 s_mov_b64_e32 s[0:1], -54321
+// SICI: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff]
+// GFX89: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
+// GFX11: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
+// GFX12: s_mov_b64 s[0:1], 0xffff2bcf            ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff]
+// GFX1250: s_mov_b64 s[0:1], 0xffffffffffff2bcf    ; encoding: [0xfe,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff,0xff,0xff,0xff,0xff]
 
-// SICI: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
-// GFX89: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x26,0xcf,0x2b,0xff,0xff]
 v_and_b32_e32 v0, -54321, v1
+// SICI: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
+// GFX89: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x26,0xcf,0x2b,0xff,0xff]
+// GFX12XX: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
+// GFX11: v_and_b32_e32 v0, 0xffff2bcf, v1        ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff]
 
-// SICI: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde]
-// GFX89: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
 s_mov_b64_e32 s[0:1], 0xdeadbeef
+// SICI: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde]
+// GFX89: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
+// GFX11: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
+// GFX12: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
+// GFX1250: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00]
 
-// SICI: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
-// GFX89: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x26,0xef,0xbe,0xad,0xde]
 v_and_b32_e32 v0, 0xdeadbeef, v1
+// SICI: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
+// GFX89: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x26,0xef,0xbe,0xad,0xde]
+// GFX12XX: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
+// GFX11: v_and_b32_e32 v0, 0xdeadbeef, v1        ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde]
 
-// SICI: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff]
-// GFX89: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
 s_mov_b64_e32 s[0:1], 0xffffffff
+// SICI: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff]
+// GFX89: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
+// GFX11: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
+// GFX12: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
+// GFX1250: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
 
-// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26]
 v_and_b32_e32 v0, 0xffffffff, v1
+// SICI: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64_e32 s[0:1], 0x123456789abcdef0
+// NOSICI: :[[@LINE-1]]:23: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:23: error: invalid operand for instruction
+// GFX1250: s_mov_b64 s[0:1], 0x123456789abcdef0    ; encoding: [0xfe,0x01,0x80,0xbe,0xf0,0xde,0xbc,0x9a,0x78,0x56,0x34,0x12]
+// NOGFX11: :[[@LINE-4]]:23: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:23: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:23: error: invalid operand for instruction
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_and_b32_e32 v0, 0x123456789abcdef0, v1
+// NOGCN: :[[@LINE-1]]:19: error: invalid operand for instruction
 
-// SICI: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x04,0x80,0xbe]
-// GFX89: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x01,0x80,0xbe]
 s_mov_b64_e32 s[0:1], 0xffffffffffffffff
+// GFX8PLUS: s_mov_b64 s[0:1], -1                    ; encoding: [0xc1,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], -1                    ; encoding: [0xc1,0x04,0x80,0xbe]
 
-// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36]
-// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26]
 v_and_b32_e32 v0, 0xffffffffffffffff, v1
+// SICI: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, -1, v1                ; encoding: [0xc1,0x02,0x00,0x36]
+
+v_not_b16 v5.l, 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_not_b16_e32 v5.l, 1                   ; encoding: [0x81,0xd2,0x0a,0x7e]
+// GFX1250: v_not_b16_e32 v5.l, 1                   ; encoding: [0x81,0xd2,0x0a,0x7e]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+v_not_b16 v5.l, lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_not_b16_e32 v5.l, lit(0x1)            ; encoding: [0xff,0xd2,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// GFX1250: v_not_b16_e32 v5.l, lit(0x1)            ; encoding: [0xff,0xd2,0x0a,0x7e,0x01,0x00,0x00,0x00]
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+
+s_mov_b64 s[0:1], 1
+// GFX8PLUS: s_mov_b64 s[0:1], 1                     ; encoding: [0x81,0x01,0x80,0xbe]
+// SICI: s_mov_b64 s[0:1], 1                     ; encoding: [0x81,0x04,0x80,0xbe]
+
+s_mov_b64 s[0:1], lit(1)
+// SICI: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x04,0x80,0xbe,0x01,0x00,0x00,0x00]
+// GFX89: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
+// GFX11: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
+// GFX12: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
+// GFX1250: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+
+v_and_b32_e32 v0, 1, v1
+// SICI: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x36]
+// GFX89: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 1, v1                 ; encoding: [0x81,0x02,0x00,0x36]
+
+v_and_b32_e32 v0, lit(1), v1
+// SICI: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
+// GFX89: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x26,0x01,0x00,0x00,0x00]
+// GFX12XX: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
+// GFX11: v_and_b32_e32 v0, lit(0x1), v1          ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00]
+
+v_pk_add_u16 v5, exec_lo, 1
+// GFX12XX: v_pk_add_u16 v5, exec_lo, 1             ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x02,0x01,0x18]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_pk_add_u16 v5, exec_lo, 1             ; encoding: [0x05,0x40,0x8a,0xd3,0x7e,0x02,0x01,0x18]
+// GFX11: v_pk_add_u16 v5, exec_lo, 1             ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x02,0x01,0x18]
+// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_pk_add_u16 v5, exec_lo, lit(1)
+// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x1)      ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x01,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_pk_add_u16 v5, exec_lo, lit(0x1)      ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x01,0x00,0x00,0x00]
+// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-5]]:31: error: invalid operand (violates constant bus restrictions)
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0x06,0x02]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(1)
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(0x1) ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // 1/(2*PI)
 //---------------------------------------------------------------------------//
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f32_e32 v0, 0x3fc45f306dc9c882
+// NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
 v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882
+// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// NOSICI: :[[@LINE-3]]:25: error: invalid operand for instruction
+// GFX11: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// NOSICIVI: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e]
 v_trunc_f32_e32 v0, 0x3e22f983
+// SICI: v_trunc_f32_e32 v0, 0x3e22f983          ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
 
-// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x64,0x00,0x7e,0x83,0xf9,0x22,0x3e]
 v_fract_f64_e32 v[0:1], 0x3e22f983
+// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x64,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX11: v_fract_f64_e32 v[0:1], 0x3e22f983      ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f32_e64 v0, 0x3fc45f306dc9c882
+// NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
 v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882
+// GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
+// GFX12XX: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0xbe,0xd5,0xf8,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-3]]:25: error: invalid operand for instruction
+// GFX11: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0xbe,0xd5,0xf8,0x00,0x00,0x00]
+// NOSICIVI: :[[@LINE-2]]:25: error: invalid operand for instruction
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX89: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
 v_trunc_f32_e64 v0, 0x3e22f983
+// GFX89: v_trunc_f32_e64 v0, 0.15915494          ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
+// GFX12XX: v_trunc_f32_e64 v0, 0.15915494          ; encoding: [0x00,0x00,0xa1,0xd5,0xf8,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-3]]:21: error: literal operands are not supported
+// GFX11: v_trunc_f32_e64 v0, 0.15915494          ; encoding: [0x00,0x00,0xa1,0xd5,0xf8,0x00,0x00,0x00]
+// NOSICIVI: :[[@LINE-2]]:21: error: literal operands are not supported
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
 v_fract_f64_e64 v[0:1], 0x3e22f983
+// GFX12XX: v_fract_f64_e64 v[0:1], 0x3e22f983      ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0x83,0xf9,0x22,0x3e]
+// NOSICI: :[[@LINE-2]]:25: error: literal operands are not supported
+// NOGFX89: :[[@LINE-3]]:25: error: literal operands are not supported
+// GFX11: v_fract_f64_e64 v[0:1], 0x3e22f983      ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0x83,0xf9,0x22,0x3e]
+// NOSICIVI: :[[@LINE-1]]:25: error: literal operands are not supported
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX89: s_mov_b64 s[0:1], 0.15915494309189532 ; encoding: [0xf8,0x01,0x80,0xbe]
 s_mov_b64_e32 s[0:1], 0.159154943091895317852646485335
+// GFX8PLUS: s_mov_b64 s[0:1], 0.15915494309189532   ; encoding: [0xf8,0x01,0x80,0xbe]
+// NOSICI: :[[@LINE-2]]:23: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-2]]:23: error: invalid operand for instruction
 
-// SICI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e]
-// GFX89: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x26]
 v_and_b32_e32 v0, 0.159154943091895317852646485335, v1
+// SICI: v_and_b32_e32 v0, 0x3e22f983, v1        ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e]
+// GFX89: v_and_b32_e32 v0, 0.15915494, v1        ; encoding: [0xf8,0x02,0x00,0x26]
+// GFX12XX: v_and_b32_e32 v0, 0.15915494, v1        ; encoding: [0xf8,0x02,0x00,0x36]
+// GFX11: v_and_b32_e32 v0, 0.15915494, v1        ; encoding: [0xf8,0x02,0x00,0x36]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX89: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
 v_and_b32_e64 v0, 0.159154943091895317852646485335, v1
+// GFX89: v_and_b32_e64 v0, 0.15915494, v1        ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
+// GFX12XX: v_and_b32_e64 v0, 0.15915494, v1        ; encoding: [0x00,0x00,0x1b,0xd5,0xf8,0x02,0x02,0x00]
+// NOSICI: :[[@LINE-3]]:19: error: literal operands are not supported
+// GFX11: v_and_b32_e64 v0, 0.15915494, v1        ; encoding: [0x00,0x00,0x1b,0xd5,0xf8,0x02,0x02,0x00]
+// NOSICIVI: :[[@LINE-2]]:19: error: literal operands are not supported
 
-// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30 ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f]
-// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
 v_fract_f64 v[0:1], 0.159154943091895317852646485335
+// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30      ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f]
+// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e]
+// GFX12XX: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// NOSICI: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
+// GFX11: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e]
+// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero
 
-// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e]
 v_trunc_f32 v0, 0.159154943091895317852646485335
+// SICI: v_trunc_f32_e32 v0, 0x3e22f983          ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x38,0x00,0x7e]
+// GFX12XX: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
+// GFX11: v_trunc_f32_e32 v0, 0.15915494          ; encoding: [0xf8,0x42,0x00,0x7e]
+
+v_trunc_f32 v0, lit(0.159154943091895317852646485335)
+// SICI: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX89: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x38,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX12XX: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
+// GFX11: v_trunc_f32_e32 v0, lit(0x3e22f983)     ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
 
 //---------------------------------------------------------------------------//
 // integer literal truncation checks
 //---------------------------------------------------------------------------//
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b32 s0, 0x101ffffffff
+// NOGCN: :[[@LINE-1]]:15: error: invalid operand for instruction
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b32 s0, 0x1000000001
+// NOGCN: :[[@LINE-1]]:15: error: invalid operand for instruction
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b32 s0, 0x1000000fff
+// NOGCN: :[[@LINE-1]]:15: error: invalid operand for instruction
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f32 v0, 0x1fffffffff0
+// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f32 v0, 0x100000001
+// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f32 v0, 0x1fffffff000
+// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64 s[0:1], 0x101ffffffff
+// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction
+// GFX1250: s_mov_b64 s[0:1], 0x101ffffffff         ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x01,0x01,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64 s[0:1], 0x1000000001
+// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction
+// GFX1250: s_mov_b64 s[0:1], 0x1000000001          ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 s_mov_b64 s[0:1], 0x1000000fff
+// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction
+// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction
+// GFX1250: s_mov_b64 s[0:1], 0x1000000fff          ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0x0f,0x00,0x00,0x10,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction
 
-// NOGFX89: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOSI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f64 v[0:1], 0x1fffffffff0
+// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction
+// GFX1250: v_trunc_f64_e32 v[0:1], 0x1fffffffff0   ; encoding: [0xfe,0x2e,0x00,0x7e,0xf0,0xff,0xff,0xff,0xff,0x01,0x00,0x00]
+// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction
 
-// NOGFX89: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOSI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f64 v[0:1], 0x100000001
+// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction
+// GFX1250: v_trunc_f64_e32 v[0:1], 0x100000001     ; encoding: [0xfe,0x2e,0x00,0x7e,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction
 
-// NOGFX89: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOSI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_trunc_f64 v[0:1], 0x1fffffff000
+// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction
+// GFX1250: v_trunc_f64_e32 v[0:1], 0x1fffffff000   ; encoding: [0xfe,0x2e,0x00,0x7e,0x00,0xf0,0xff,0xff,0xff,0x01,0x00,0x00]
+// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction
 
 //---------------------------------------------------------------------------//
 // named inline values: scc, vccz, execz
 //---------------------------------------------------------------------------//
 
+buffer_atomic_add v0, off, s[0:3], scc offset:4095
 // SICI: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xc8,0xe0,0x00,0x00,0x00,0xfd]
 // GFX89: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd]
-buffer_atomic_add v0, off, s[0:3], scc offset:4095
+// GFX12XX: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xfd]
 
-// SICI: s_add_i32 s0, src_vccz, s0      ; encoding: [0xfb,0x00,0x00,0x81]
-// GFX89: s_add_i32 s0, src_vccz, s0      ; encoding: [0xfb,0x00,0x00,0x81]
 s_add_i32 s0, vccz, s0
+// SICI: s_add_i32 s0, src_vccz, s0              ; encoding: [0xfb,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_vccz, s0              ; encoding: [0xfb,0x00,0x00,0x81]
+// NOGFX11: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:15: error: src_vccz register not available on this GPU
 
-// SICI: s_add_i32 s0, src_execz, s0      ; encoding: [0xfc,0x00,0x00,0x81]
-// GFX89: s_add_i32 s0, src_execz, s0      ; encoding: [0xfc,0x00,0x00,0x81]
 s_add_i32 s0, execz, s0
+// SICI: s_add_i32 s0, src_execz, s0             ; encoding: [0xfc,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_execz, s0             ; encoding: [0xfc,0x00,0x00,0x81]
+// NOGFX11: :[[@LINE-3]]:15: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:15: error: src_execz register not available on this GPU
 
-// SICI: s_add_i32 s0, src_scc, s0       ; encoding: [0xfd,0x00,0x00,0x81]
-// GFX89: s_add_i32 s0, src_scc, s0       ; encoding: [0xfd,0x00,0x00,0x81]
 s_add_i32 s0, scc, s0
+// SICI: s_add_i32 s0, src_scc, s0               ; encoding: [0xfd,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_scc, s0               ; encoding: [0xfd,0x00,0x00,0x81]
+// GFX12XX: s_add_co_i32 s0, src_scc, s0            ; encoding: [0xfd,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_scc, s0               ; encoding: [0xfd,0x00,0x00,0x81]
 
-// SICI: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x87]
-// GFX89: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x86]
 s_and_b64 s[0:1], s[0:1], src_vccz
+// SICI: s_and_b64 s[0:1], s[0:1], src_vccz      ; encoding: [0x00,0xfb,0x80,0x87]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_vccz      ; encoding: [0x00,0xfb,0x80,0x86]
+// NOGFX11: :[[@LINE-3]]:27: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:27: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:27: error: src_vccz register not available on this GPU
 
-// SICI: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x87]
-// GFX89: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x86]
 s_and_b64 s[0:1], s[0:1], src_execz
+// SICI: s_and_b64 s[0:1], s[0:1], src_execz     ; encoding: [0x00,0xfc,0x80,0x87]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_execz     ; encoding: [0x00,0xfc,0x80,0x86]
+// NOGFX11: :[[@LINE-3]]:27: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:27: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:27: error: src_execz register not available on this GPU
 
-// SICI: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x87]
-// GFX89: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x86]
 s_and_b64 s[0:1], s[0:1], src_scc
+// SICI: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x87]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x86]
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x8b]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_scc       ; encoding: [0x00,0xfd,0x80,0x8b]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX89: v_add_u16_e32 v0, src_vccz, v0  ; encoding: [0xfb,0x00,0x00,0x4c]
 v_add_u16 v0, vccz, v0
+// GFX89: v_add_u16_e32 v0, src_vccz, v0          ; encoding: [0xfb,0x00,0x00,0x4c]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
-// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06]
 v_add_u16_sdwa v0, scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06]
+// NOVI: :[[@LINE-3]]:20: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86]
 v_add_u16_sdwa v0, v0, scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86]
+// NOVI: :[[@LINE-3]]:24: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_add_u32_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x68]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 v_add_u32 v0, execz, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e32 v0, src_execz, v0         ; encoding: [0xfc,0x00,0x00,0x68]
+// NOVI: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX11: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:15: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:15: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_add_u32_e64 v0, src_scc, v0   ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 v_add_u32_e64 v0, scc, v0
+// GFX12XX: v_add_nc_u32_e64 v0, src_scc, v0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x02,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e64 v0, src_scc, v0           ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00]
+// GFX11: v_add_nc_u32_e64 v0, src_scc, v0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x02,0x00]
+// NOVI: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0x44,0x7d]
-// GFX89: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0xc4,0x7d]
 v_cmp_eq_i64 vcc, scc, v[0:1]
+// SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1]   ; encoding: [0xfd,0x00,0x44,0x7d]
+// GFX89: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1]   ; encoding: [0xfd,0x00,0xc4,0x7d]
+// NOGFX11: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX89: v_max_f16_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x5a]
 v_max_f16 v0, execz, v0
+// GFX89: v_max_f16_e32 v0, src_execz, v0         ; encoding: [0xfc,0x00,0x00,0x5a]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:15: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:15: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:15: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
-// SICI: v_max_f32_e32 v0, src_vccz, v0  ; encoding: [0xfb,0x00,0x00,0x20]
-// GFX89: v_max_f32_e32 v0, src_vccz, v0  ; encoding: [0xfb,0x00,0x00,0x16]
 v_max_f32 v0, vccz, v0
+// SICI: v_max_f32_e32 v0, src_vccz, v0          ; encoding: [0xfb,0x00,0x00,0x20]
+// GFX89: v_max_f32_e32 v0, src_vccz, v0          ; encoding: [0xfb,0x00,0x00,0x16]
+// NOGFX11: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:15: error: src_vccz register not available on this GPU
 
-// SICI: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0xce,0xd2,0xfd,0x00,0x02,0x00]
-// GFX89: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00]
 v_max_f64 v[0:1], scc, v[0:1]
+// SICI: v_max_f64 v[0:1], src_scc, v[0:1]       ; encoding: [0x00,0x00,0xce,0xd2,0xfd,0x00,0x02,0x00]
+// GFX89: v_max_f64 v[0:1], src_scc, v[0:1]       ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00]
+// GFX12XX: v_max_num_f64_e32 v[0:1], src_scc, v[0:1] ; encoding: [0xfd,0x00,0x00,0x1c]
+// GFX11: v_max_f64 v[0:1], src_scc, v[0:1]       ; encoding: [0x00,0x00,0x2a,0xd7,0xfd,0x00,0x02,0x00]
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX9: v_pk_add_f16 v0, src_execz, v0  ; encoding: [0x00,0x40,0x8f,0xd3,0xfc,0x00,0x02,0x18]
 v_pk_add_f16 v0, execz, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_pk_add_f16 v0, src_execz, v0          ; encoding: [0x00,0x40,0x8f,0xd3,0xfc,0x00,0x02,0x18]
+// NOVI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-4]]:18: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:18: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:18: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX89: v_ceil_f16_e64 v0, -src_vccz    ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20]
 v_ceil_f16 v0, neg(vccz)
+// GFX89: v_ceil_f16_e64 v0, -src_vccz            ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-3]]:20: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:20: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:20: error: src_vccz register not available on this GPU
+// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX89: v_ceil_f16_e64 v0, |src_scc|    ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
 v_ceil_f16 v0, abs(scc)
+// GFX89: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
+// GFX12XX: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// GFX11: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
-// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// CI: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x30,0xd3,0xfc,0x00,0x00,0x00]
-// GFX89: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00]
 v_ceil_f64 v[5:6], |execz|
+// GFX89: v_ceil_f64_e64 v[5:6], |src_execz|      ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00]
+// CI: v_ceil_f64_e64 v[5:6], |src_execz|      ; encoding: [0x05,0x01,0x30,0xd3,0xfc,0x00,0x00,0x00]
+// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX11: :[[@LINE-4]]:21: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:21: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:21: error: src_execz register not available on this GPU
 
-// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// CI: v_ceil_f64_e64 v[5:6], -vcc     ; encoding: [0x05,0x00,0x30,0xd3,0x6a,0x00,0x00,0x20]
-// GFX89: v_ceil_f64_e64 v[5:6], -vcc     ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20]
 v_ceil_f64 v[5:6], -vcc
+// GFX89: v_ceil_f64_e64 v[5:6], -vcc             ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20]
+// CI: v_ceil_f64_e64 v[5:6], -vcc             ; encoding: [0x05,0x00,0x30,0xd3,0x6a,0x00,0x00,0x20]
+// GFX11: v_ceil_f64_e64 v[5:6], -vcc             ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x20]
+// GFX12: v_ceil_f64_e64 v[5:6], -vcc             ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x20]
+// NOSI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:12: error: invalid operand for instruction
 
-// SICI: v_ceil_f32_e64 v0, -src_vccz    ; encoding: [0x00,0x00,0x44,0xd3,0xfb,0x00,0x00,0x20]
-// GFX89: v_ceil_f32_e64 v0, -src_vccz    ; encoding: [0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20]
 v_ceil_f32 v0, -vccz
+// SICI: v_ceil_f32_e64 v0, -src_vccz            ; encoding: [0x00,0x00,0x44,0xd3,0xfb,0x00,0x00,0x20]
+// GFX89: v_ceil_f32_e64 v0, -src_vccz            ; encoding: [0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20]
+// NOGFX11: :[[@LINE-3]]:17: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:17: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:17: error: src_vccz register not available on this GPU
 
-// SICI: v_ceil_f32_e64 v0, |src_execz|  ; encoding: [0x00,0x01,0x44,0xd3,0xfc,0x00,0x00,0x00]
-// GFX89: v_ceil_f32_e64 v0, |src_execz|  ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00]
 v_ceil_f32 v0, |execz|
+// SICI: v_ceil_f32_e64 v0, |src_execz|          ; encoding: [0x00,0x01,0x44,0xd3,0xfc,0x00,0x00,0x00]
+// GFX89: v_ceil_f32_e64 v0, |src_execz|          ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-3]]:17: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:17: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:17: error: src_execz register not available on this GPU
 
-// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00]
 v_ceil_f16_sdwa v5, |vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00]
+// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction
-// GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00]
 v_ceil_f16_sdwa v5, -scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00]
+// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_ceil_f32_sdwa v5, vccz dst_sel:DWORD src0_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
+// GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00]
+// NOVI: :[[@LINE-3]]:21: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 
-// GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
 v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
+// GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00]
+// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 
 //---------------------------------------------------------------------------//
 // named inline values: shared_base, shared_limit, private_base, etc
 //---------------------------------------------------------------------------//
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb]
 buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095
+// NOSICI: :[[@LINE-1]]:36: error: src_shared_base register not available on this GPU
+// GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb]
+// GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xeb]
+// NOVI: :[[@LINE-4]]:36: error: src_shared_base register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-6]]:1: error: operands are not valid for this GPU or mode
+// NOSICIVI: :[[@LINE-1]]:36: error: src_shared_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81]
 s_add_i32 s0, src_shared_base, s0
+// GFX12XX: s_add_co_i32 s0, src_shared_base, s0    ; encoding: [0xeb,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-2]]:15: error: src_shared_base register not available on this GPU
+// GFX9: s_add_i32 s0, src_shared_base, s0       ; encoding: [0xeb,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_shared_base, s0       ; encoding: [0xeb,0x00,0x00,0x81]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_shared_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_limit register not available on this GPU
-// GFX9: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81]
 s_add_i32 s0, src_shared_limit, s0
+// GFX12XX: s_add_co_i32 s0, src_shared_limit, s0   ; encoding: [0xec,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-2]]:15: error: src_shared_limit register not available on this GPU
+// GFX9: s_add_i32 s0, src_shared_limit, s0      ; encoding: [0xec,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_shared_limit, s0      ; encoding: [0xec,0x00,0x00,0x81]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_limit register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_shared_limit register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
-// GFX9: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81]
 s_add_i32 s0, src_private_base, s0
+// GFX12XX: s_add_co_i32 s0, src_private_base, s0   ; encoding: [0xed,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-2]]:15: error: src_private_base register not available on this GPU
+// GFX9: s_add_i32 s0, src_private_base, s0      ; encoding: [0xed,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_private_base, s0      ; encoding: [0xed,0x00,0x00,0x81]
+// NOVI: :[[@LINE-5]]:15: error: src_private_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_private_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_limit register not available on this GPU
-// GFX9: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81]
 s_add_i32 s0, src_private_limit, s0
+// GFX12XX: s_add_co_i32 s0, src_private_limit, s0  ; encoding: [0xee,0x00,0x00,0x81]
+// NOSICI: :[[@LINE-2]]:15: error: src_private_limit register not available on this GPU
+// GFX9: s_add_i32 s0, src_private_limit, s0     ; encoding: [0xee,0x00,0x00,0x81]
+// GFX11: s_add_i32 s0, src_private_limit, s0     ; encoding: [0xee,0x00,0x00,0x81]
+// NOVI: :[[@LINE-5]]:15: error: src_private_limit register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_private_limit register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_pops_exiting_wave_id register not available on this GPU
-// GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81]
 s_add_i32 s0, src_pops_exiting_wave_id, s0
+// NOSICI: :[[@LINE-1]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81]
+// NOVI: :[[@LINE-3]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:15: error: src_pops_exiting_wave_id register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_pops_exiting_wave_id register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86]
 s_and_b64 s[0:1], s[0:1], src_shared_base
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x8b]
+// NOSICI: :[[@LINE-2]]:27: error: src_shared_base register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x8b]
+// NOVI: :[[@LINE-5]]:27: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_shared_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_limit register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86]
 s_and_b64 s[0:1], s[0:1], src_shared_limit
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x8b]
+// NOSICI: :[[@LINE-2]]:27: error: src_shared_limit register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x8b]
+// NOVI: :[[@LINE-5]]:27: error: src_shared_limit register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_shared_limit register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86]
 s_and_b64 s[0:1], s[0:1], src_private_base
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x8b]
+// NOSICI: :[[@LINE-2]]:27: error: src_private_base register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x8b]
+// NOVI: :[[@LINE-5]]:27: error: src_private_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_private_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_limit register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86]
 s_and_b64 s[0:1], s[0:1], src_private_limit
+// GFX12XX: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x8b]
+// NOSICI: :[[@LINE-2]]:27: error: src_private_limit register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86]
+// GFX11: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x8b]
+// NOVI: :[[@LINE-5]]:27: error: src_private_limit register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_private_limit register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_pops_exiting_wave_id register not available on this GPU
-// GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86]
 s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id
+// NOSICI: :[[@LINE-1]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86]
+// NOVI: :[[@LINE-3]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:27: error: src_pops_exiting_wave_id register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: src_pops_exiting_wave_id register not available on this GPU
 
-// GFX9: v_add_u16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4c]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_add_u16 v0, src_shared_base, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x4c]
+// NOVI: :[[@LINE-3]]:15: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06]
+// NOVI: :[[@LINE-3]]:20: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86]
+// NOVI: :[[@LINE-3]]:24: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_add_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x68]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_add_u32 v0, src_shared_base, v0
+// GFX12XX: v_add_nc_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4a]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x68]
+// GFX11: v_add_nc_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4a]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_add_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_add_u32_e64 v0, src_shared_base, v0
+// GFX12XX: v_add_nc_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xeb,0x00,0x02,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e64 v0, src_shared_base, v0   ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00]
+// GFX11: v_add_nc_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xeb,0x00,0x02,0x00]
+// NOVI: :[[@LINE-5]]:19: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d]
 v_cmp_eq_i64 vcc, src_shared_base, v[0:1]
+// NOSICI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU
+// GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d]
+// NOVI: :[[@LINE-3]]:19: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode
+// NOGFX1250: :[[@LINE-6]]:1: error: operands are not valid for this GPU or mode
+// NOSICIVI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU
 
-// GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_max_f16 v0, src_shared_base, v0
+// GFX12XX: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_max_f16_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x5a]
+// GFX11: v_max_f16_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x72]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x16]
 v_max_f32 v0, src_shared_base, v0
+// GFX12XX: v_max_num_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x2c]
+// NOSICI: :[[@LINE-2]]:15: error: src_shared_base register not available on this GPU
+// GFX9: v_max_f32_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x16]
+// GFX11: v_max_f32_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x20]
+// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:15: error: src_shared_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00]
 v_max_f64 v[0:1], src_shared_base, v[0:1]
+// GFX12XX: v_max_num_f64_e32 v[0:1], src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0x00,0x1c]
+// NOSICI: :[[@LINE-2]]:19: error: src_shared_base register not available on this GPU
+// GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00]
+// GFX11: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x2a,0xd7,0xeb,0x00,0x02,0x00]
+// NOVI: :[[@LINE-5]]:19: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xeb,0x00,0x02,0x18]
 v_pk_add_f16 v0, src_shared_base, v0
+// GFX12XX: v_pk_add_f16 v0, src_shared_base, v0    ; encoding: [0x00,0x40,0x0f,0xcc,0xeb,0x00,0x02,0x18]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_pk_add_f16 v0, src_shared_base, v0    ; encoding: [0x00,0x40,0x8f,0xd3,0xeb,0x00,0x02,0x18]
+// GFX11: v_pk_add_f16 v0, src_shared_base, v0    ; encoding: [0x00,0x40,0x0f,0xcc,0xeb,0x00,0x02,0x18]
+// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_ceil_f16 v0, neg(src_shared_base)
+// GFX12XX: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20]
+// GFX11: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_ceil_f16 v0, abs(src_shared_base)
+// GFX12XX: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00]
+// GFX11: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00]
-// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_ceil_f64 v[5:6], |src_shared_base|
+// GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00]
+// GFX11: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x98,0xd5,0xeb,0x00,0x00,0x00]
+// GFX12: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x98,0xd5,0xeb,0x00,0x00,0x00]
+// NOSI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-6]]:21: error: src_shared_base register not available on this GPU
+// NOGFX1250: :[[@LINE-7]]:12: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
 
-// GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20]
-// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOCIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_ceil_f64 v[5:6], -src_shared_base
+// GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20]
+// GFX11: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x98,0xd5,0xeb,0x00,0x00,0x20]
+// GFX12: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x98,0xd5,0xeb,0x00,0x00,0x20]
+// NOSI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOCI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-6]]:21: error: src_shared_base register not available on this GPU
+// NOGFX1250: :[[@LINE-7]]:12: error: invalid operand for instruction
+// NOCIVI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20]
 v_ceil_f32 v0, -src_shared_base
+// GFX12XX: v_ceil_f32_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xa2,0xd5,0xeb,0x00,0x00,0x20]
+// NOSICI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU
+// GFX9: v_ceil_f32_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20]
+// GFX11: v_ceil_f32_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xa2,0xd5,0xeb,0x00,0x00,0x20]
+// NOVI: :[[@LINE-5]]:17: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// GFX9: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00]
 v_ceil_f32 v0, |src_shared_base|
+// GFX12XX: v_ceil_f32_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xa2,0xd5,0xeb,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU
+// GFX9: v_ceil_f32_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00]
+// GFX11: v_ceil_f32_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xa2,0xd5,0xeb,0x00,0x00,0x00]
+// NOVI: :[[@LINE-5]]:17: error: src_shared_base register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
 
-// GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
+// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00]
+// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD src0_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
+// GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00]
+// NOVI: :[[@LINE-3]]:21: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 
-// GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
 v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD
+// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
+// GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
+// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU
+// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported
+// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported
+// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported
 
 //---------------------------------------------------------------------------//
 // named inline values compete with other scalars for constant bus access
 //---------------------------------------------------------------------------//
 
-// NOGFX9: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
 v_add_u32 v0, private_base, s0
+// GFX12XX: v_add_nc_u32_e64 v0, src_private_base, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xed,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_add_nc_u32_e64 v0, src_private_base, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xed,0x00,0x00,0x00]
+// NOVI: :[[@LINE-4]]:15: error: src_private_base register not available on this GPU
+// NOGFX9: :[[@LINE-5]]:29: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOGFX9: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 v_add_u32 v0, scc, s0
+// GFX12XX: v_add_nc_u32_e64 v0, src_scc, s0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x00,0x00]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_add_nc_u32_e64 v0, src_scc, s0        ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x00,0x00]
+// NOVI: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX9: :[[@LINE-5]]:20: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 // v_div_fmas implicitly reads VCC
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_div_fmas_f32 v0, shared_base, v0, v1
+// GFX12XX: v_div_fmas_f32 v0, src_shared_base, v0, v1 ; encoding: [0x00,0x00,0x37,0xd6,0xeb,0x00,0x06,0x04]
+// NOSICI: :[[@LINE-2]]:20: error: src_shared_base register not available on this GPU
+// GFX11: v_div_fmas_f32 v0, src_shared_base, v0, v1 ; encoding: [0x00,0x00,0x37,0xd6,0xeb,0x00,0x06,0x04]
+// NOVI: :[[@LINE-4]]:20: error: src_shared_base register not available on this GPU
+// NOGFX9: :[[@LINE-5]]:20: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:20: error: src_shared_base register not available on this GPU
 
 // v_div_fmas implicitly reads VCC
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_limit register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_div_fmas_f32 v0, v0, shared_limit, v1
+// GFX12XX: v_div_fmas_f32 v0, v0, src_shared_limit, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xd9,0x05,0x04]
+// NOSICI: :[[@LINE-2]]:24: error: src_shared_limit register not available on this GPU
+// GFX11: v_div_fmas_f32 v0, v0, src_shared_limit, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xd9,0x05,0x04]
+// NOVI: :[[@LINE-4]]:24: error: src_shared_limit register not available on this GPU
+// NOGFX9: :[[@LINE-5]]:24: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:24: error: src_shared_limit register not available on this GPU
 
 // v_div_fmas implicitly reads VCC
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_limit register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_div_fmas_f32 v0, v0, v1, private_limit
+// GFX12XX: v_div_fmas_f32 v0, v0, v1, src_private_limit ; encoding: [0x00,0x00,0x37,0xd6,0x00,0x03,0xba,0x03]
+// NOSICI: :[[@LINE-2]]:28: error: src_private_limit register not available on this GPU
+// GFX11: v_div_fmas_f32 v0, v0, v1, src_private_limit ; encoding: [0x00,0x00,0x37,0xd6,0x00,0x03,0xba,0x03]
+// NOVI: :[[@LINE-4]]:28: error: src_private_limit register not available on this GPU
+// NOGFX9: :[[@LINE-5]]:28: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:28: error: src_private_limit register not available on this GPU
 
 // v_div_fmas implicitly reads VCC
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_div_fmas_f32 v0, execz, v0, v1
+// NOSICI: :[[@LINE-1]]:20: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-2]]:20: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-3]]:20: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:20: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:20: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:20: error: invalid operand (violates constant bus restrictions)
 
 // v_div_fmas implicitly reads VCC
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_div_fmas_f32 v0, v0, scc, v1
+// GFX12XX: v_div_fmas_f32 v0, v0, src_scc, v1      ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xfb,0x05,0x04]
+// NOSICI: :[[@LINE-2]]:24: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-3]]:24: error: invalid operand (violates constant bus restrictions)
+// GFX11: v_div_fmas_f32 v0, v0, src_scc, v1      ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xfb,0x05,0x04]
+// NOSICIVI: :[[@LINE-1]]:24: error: invalid operand (violates constant bus restrictions)
 
 // v_div_fmas implicitly reads VCC
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_div_fmas_f32 v0, v0, v1, vccz
+// NOSICI: :[[@LINE-1]]:28: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-2]]:28: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-3]]:28: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:28: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:28: error: src_vccz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:28: error: invalid operand (violates constant bus restrictions)
 
 // v_addc_co_u32 implicitly reads VCC (VOP2)
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_addc_co_u32 v0, vcc, shared_base, v0, vcc
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-3]]:24: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_madak_f32 v0, shared_base, v0, 0x11213141
+// NOSICI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
+// NOVI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU
+// NOGFX9: :[[@LINE-3]]:17: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_madak_f32 v0, scc, v0, 0x11213141
+// NOSICI: :[[@LINE-1]]:17: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-2]]:17: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:17: error: invalid operand (violates constant bus restrictions)
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
 v_madak_f32 v0, 0xff32ff, v0, 0x11213141
+// NOSICI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
+// NOGFX89: :[[@LINE-2]]:31: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
 v_madak_f32 v0, 0xff32ff, v0, 1
+// NOSICI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
+// NOGFX89: :[[@LINE-2]]:31: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
 v_madmk_f32 v0, 0xff32ff, 0x11213141, v0
+// NOSICI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
+// NOGFX89: :[[@LINE-2]]:27: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
 v_madmk_f32 v0, 0xff32ff, -1, v0
+// NOSICI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
+// NOGFX89: :[[@LINE-2]]:27: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
 v_madak_f16 v0, 0xff32, v0, 0x1122
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:29: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
 v_madak_f16 v0, 0xff32, v0, 0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:29: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
 v_madmk_f16 v0, 0xff32, 0x1122, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:25: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed
 v_madmk_f16 v0, 0xff32, 1, v0
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-2]]:25: error: only one unique literal operand is allowed
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_cmp_eq_f32 s[0:1], private_base, private_limit
+// NOSICI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
+// NOVI: :[[@LINE-2]]:22: error: src_private_base register not available on this GPU
+// NOGFX9: :[[@LINE-3]]:36: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:14: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:14: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-6]]:14: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_cmp_eq_f32 s[0:1], private_base, s0
+// NOSICI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
+// NOVI: :[[@LINE-2]]:22: error: src_private_base register not available on this GPU
+// NOGFX9: :[[@LINE-3]]:36: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:14: error: invalid operand for instruction
+// NOGFX12: :[[@LINE-5]]:14: error: invalid operand for instruction
+// NOGFX1250: :[[@LINE-6]]:14: error: invalid operand for instruction
+// NOSICIVI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU
 
-// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_cmp_eq_f32 s[0:1], execz, s0
+// NOSICI: :[[@LINE-1]]:29: error: invalid operand (violates constant bus restrictions)
+// NOGFX89: :[[@LINE-2]]:29: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-3]]:22: error: src_execz register not available on this GPU
+// NOGFX12: :[[@LINE-4]]:22: error: src_execz register not available on this GPU
+// NOGFX1250: :[[@LINE-5]]:22: error: src_execz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:29: error: invalid operand (violates constant bus restrictions)
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_pk_add_f16 v255, private_base, private_limit
+// GFX12XX: v_pk_add_f16 v255, src_private_base, src_private_limit ; encoding: [0xff,0x40,0x0f,0xcc,0xed,0xdc,0x01,0x18]
+// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX11: v_pk_add_f16 v255, src_private_base, src_private_limit ; encoding: [0xff,0x40,0x0f,0xcc,0xed,0xdc,0x01,0x18]
+// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-5]]:34: error: invalid operand (violates constant bus restrictions)
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
-// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU
-// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
 v_pk_add_f16 v255, vccz, execz
+// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// NOGFX9: :[[@LINE-3]]:26: error: invalid operand (violates constant bus restrictions)
+// NOGFX11: :[[@LINE-4]]:20: error: src_vccz register not available on this GPU
+// NOGFX12: :[[@LINE-5]]:20: error: src_vccz register not available on this GPU
+// NOGFX1250: :[[@LINE-6]]:20: error: src_vccz register not available on this GPU
+// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
-// check dummy lit() syntax for sp3 compatibility.
+// check lit() syntax.
 //---------------------------------------------------------------------------//
 
-// SICI: v_sqrt_f32_e32 v2, 0x7b                 ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX89: v_sqrt_f32_e32 v2, 0x7b                 ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
 v_sqrt_f32 v2, lit(123)
+// SICI: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX89: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX11: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 
-// SICI: v_sqrt_f32_e32 v2, 0x7b                 ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX89: v_sqrt_f32_e32 v2, 0x7b                 ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
 v_sqrt_f32 v2, abs(lit(123))
+// SICI: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX89: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX11: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 
-// SICI: v_sqrt_f32_e32 v2, 0x42f60000           ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42
-// GFX89: v_sqrt_f32_e32 v2, 0x42f60000           ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42]
 v_sqrt_f32 v2, lit(123.0)
+// SICI: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX89: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX12XX: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX11: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
 
-// SICI: v_sqrt_f64_e32 v[2:3], 0x405ec000       ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
-// GFX89: v_sqrt_f64_e32 v[2:3], 0x405ec000       ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40]
 v_sqrt_f64 v[2:3], lit(123.0)
+// SICI: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
+// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40]
+// GFX11: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
+// GFX12: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
+// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00]
 
-// SICI: v_sqrt_f64_e32 v[2:3], 0x7b             ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX89: v_sqrt_f64_e32 v[2:3], 0x7b             ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00]
 v_sqrt_f64 v[2:3], lit(123)
+// SICI: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX11: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected left paren after lit
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected left paren after lit
 v_sqrt_f32 v2, lit 123.0
+// NOGCN: :[[@LINE-1]]:20: error: expected left paren after lit
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected closing parentheses
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected closing parentheses
 v_sqrt_f32 v2, lit(123.0
+// NOGCN: :[[@LINE-1]]:25: error: expected closing parentheses
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected immediate with lit modifier
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected immediate with lit modifier
 v_sqrt_f32 v2, lit(v1)
+// NOGCN: :[[@LINE-1]]:20: error: expected immediate with lit modifier
 
 // Make sure lit() is accepted on operands without modifiers.
 
-// SICI: v_madak_f32 v4, 0x7e8, v8, 0x7e8        ; encoding: [0xff,0x10,0x08,0x42,0xe8,0x07,0x00,0x00]
-// GFX89: v_madak_f32 v4, 0x7e8, v8, 0x7e8        ; encoding: [0xff,0x10,0x08,0x30,0xe8,0x07,0x00,0x00]
 v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8)
+// SICI: v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) ; encoding: [0xff,0x10,0x08,0x42,0xe8,0x07,0x00,0x00]
+// GFX89: v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) ; encoding: [0xff,0x10,0x08,0x30,0xe8,0x07,0x00,0x00]
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
-// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: not a valid operand.
-// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand.
 v_madak_f32 v4, lit(lit(0x7e8)), v8, lit(0x7e8)
+// NOSICI: :[[@LINE-1]]:24: error: not a valid operand.
+// NOGFX89: :[[@LINE-2]]:24: error: not a valid operand.
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICIVI: :[[@LINE-1]]:24: error: not a valid operand.
diff --git a/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s b/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s
index c935c37..dbaddc1 100644
--- a/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s
+++ b/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s
@@ -1,103 +1,103 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=GFX90A --implicit-check-not=error: %s
 
 v_add_f64 v[1:2], v[1:2], v[1:2]
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 global_load_dwordx2 v[1:2], v[0:1], off
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 global_load_dwordx3 v[1:3], v[0:1], off
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 global_load_dwordx4 v[1:4], v[0:1], off
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 global_load_dwordx2 a[1:2], v[0:1], off
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 global_load_dwordx3 a[1:3], v[0:1], off
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 global_load_dwordx4 a[1:4], v[0:1], off
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 
 image_load v[1:2], v2, s[0:7] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_load v[1:3], v2, s[0:7] dmask:0x7 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_load v[1:4], v2, s[0:7] dmask:0xf unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_load a[1:2], v2, s[0:7] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_load a[1:3], v2, s[0:7] dmask:0x7 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_load a[1:4], v2, s[0:7] dmask:0xf unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 
 image_store v[193:194], v[238:241], s[28:35] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_store v[193:195], v[238:241], s[28:35] dmask:0x7 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_store v[193:196], v[238:241], s[28:35] dmask:0xf unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_store a[193:194], v[238:241], s[28:35] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_store a[193:195], v[238:241], s[28:35] dmask:0x7 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_store a[193:196], v[238:241], s[28:35] dmask:0xf unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 
 image_atomic_swap v4, v[193:196], s[28:35] dmask:0x1 unorm glc
 // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_atomic_swap v[5:6], v1, s[8:15] dmask:0x3 unorm
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 
 image_atomic_cmpswap v[5:6], v[192:195], s[28:35] dmask:0x3 unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_atomic_cmpswap v[4:5], v[193:196], s[28:35] dmask:0x3 unorm glc
 // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_atomic_cmpswap v[5:8], v[192:195], s[28:35] dmask:0xf unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_atomic_cmpswap v[4:7], v[193:196], s[28:35] dmask:0xf unorm glc
 // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 
 image_atomic_cmpswap a[5:6], v[192:195], s[28:35] dmask:0x3 unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_atomic_cmpswap a[4:5], v[193:196], s[28:35] dmask:0x3 unorm glc
 // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 image_atomic_cmpswap a[5:8], v[192:195], s[28:35] dmask:0xf unorm glc
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 image_atomic_cmpswap a[4:7], v[193:196], s[28:35] dmask:0xf unorm glc
 // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
 
 
 v_mfma_f32_32x32x8f16 a[0:15], a[1:2], v[0:1], a[0:15]
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_mfma_i32_4x4x4i8 a[1:4], a0, v1, 2
-// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
 v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[17:32]
 // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned
diff --git a/llvm/test/MC/AMDGPU/vop3-gfx9.s b/llvm/test/MC/AMDGPU/vop3-gfx9.s
index f98f33a..50a7433 100644
--- a/llvm/test/MC/AMDGPU/vop3-gfx9.s
+++ b/llvm/test/MC/AMDGPU/vop3-gfx9.s
@@ -566,6 +566,141 @@ v_interp_p2_f16 v5, v2, attr0.x, v3 clamp
 // NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
 // VI: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x76,0xd2,0x00,0x04,0x0e,0x04]
 
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3     ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3     ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3     ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,1] ; encoding: [0x05,0x60,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,0,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3     ; encoding: [0x05,0x00,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,1,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,1,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,1] ; encoding: [0x05,0x60,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,1] ; encoding: [0x05,0x48,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,1] ; encoding: [0x05,0x68,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,0,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,0,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,1] ; encoding: [0x05,0x48,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1,0]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
+v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,1,1,1]
+// GFX9: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,1] ; encoding: [0x05,0x68,0x77,0xd2,0x00,0x04,0x0e,0x04]
+// NOSICI: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-3]]:{{[0-9]+}}: error: not a valid operand.
+
 v_interp_p2_legacy_f16 v5, v2, attr31.x, v3
 // GFX9: v_interp_p2_legacy_f16 v5, v2, attr31.x, v3 ; encoding: [0x05,0x00,0x76,0xd2,0x1f,0x04,0x0e,0x04]
 // NOGCN: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/vop3-literal.s b/llvm/test/MC/AMDGPU/vop3-literal.s
index 56e71b9..dd6be544 100644
--- a/llvm/test/MC/AMDGPU/vop3-literal.s
+++ b/llvm/test/MC/AMDGPU/vop3-literal.s
@@ -3,6 +3,7 @@
 // RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx900 | FileCheck %s -check-prefix=GFX9
 // RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx1010 -mattr=+wavefrontsize64 | FileCheck %s -check-prefix=GFX10
 // RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx1250 -mattr=+wavefrontsize64 | FileCheck %s -check-prefix=GFX1250
+// RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx1250 -mattr=+wavefrontsize64 | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s
 
 // RUN: not llvm-mc -triple=amdgcn %s -filetype=null -no-warn 2>&1 -mcpu=gfx900 | FileCheck %s -implicit-check-not=error: -check-prefix=GFX9-ERR
 // RUN: not llvm-mc -triple=amdgcn %s -filetype=null -no-warn 2>&1 -mcpu=gfx1010 -mattr=+wavefrontsize64 | FileCheck %s -implicit-check-not=error: -check-prefix=GFX10-ERR
@@ -185,7 +186,7 @@ v_add_f16_e64 v0, 0xfe0b, neg(0xfe0b)
 
 v_add_f64 v[0:1], 1.23456, v[0:1]
 // GFX10: v_add_f64 v[0:1], 0x3ff3c0c1, v[0:1]    ; encoding: [0x00,0x00,0x64,0xd5,0xff,0x00,0x02,0x00,0xc1,0xc0,0xf3,0x3f]
-// GFX1250: v_add_f64_e32 v[0:1], lit64(0x3ff3c0c1fc8f3238), v[0:1] ; encoding: [0xfe,0x00,0x00,0x04,0x38,0x32,0x8f,0xfc,0xc1,0xc0,0xf3,0x3f]
+// GFX1250: v_add_f64_e32 v[0:1], 0x3ff3c0c1fc8f3238, v[0:1] ; encoding: [0xfe,0x00,0x00,0x04,0x38,0x32,0x8f,0xfc,0xc1,0xc0,0xf3,0x3f]
 // GFX9-ERR: :[[@LINE-3]]:19: error: literal operands are not supported
 
 v_add_f64 v[0:1], v[0:1], -abs(1.23456)
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt
index 7064479..d44400e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt
@@ -2,55 +2,55 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
 
 0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_mov_b64 s[2:3], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_mov_b64 s[2:3], 0x10abcdef12345678    ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_add_nc_u64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_add_nc_u64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_and_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_and_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), lit64(0x10abcdef12345678) ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, 0x10abcdef12345678 ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_ashr_i64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_ashr_i64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80
-# GFX1250: s_bfe_i64 s[2:3], lit64(0x80abcdef12345678), 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80]
+# GFX1250: s_bfe_i64 s[2:3], 0x80abcdef12345678, 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80]
 
 0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_bfe_u64 s[2:3], lit64(0x10abcdef12345678), 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_bfe_u64 s[2:3], 0x10abcdef12345678, 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_cselect_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_lshl_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_lshl_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_lshr_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_lshr_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_mul_u64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_mul_u64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_nand_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_nand_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_nor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_nor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_or_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_or_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_or_not1_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_xnor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_xnor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: s_xor_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: s_xor_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
index 227e1c4..34a4646 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
 
 0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00
-# GFX1250: s_add_pc_i64 lit64(0x12345678abcd0)     ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00]
+# GFX1250: s_add_pc_i64 0x12345678abcd0            ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00]
 
 0xff,0x4b,0x80,0xbe,0x64,0x00,0x00,0x00
 # GFX1250: s_add_pc_i64 0x64                       ; encoding: [0xff,0x4b,0x80,0xbe,0x64,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt
index 1571fb9..cce6a74 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt
@@ -2,211 +2,211 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
 
 0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_add_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_add_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_ceil_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_class_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_class_f64_e32 vcc_lo, 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_eq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_eq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_ge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_ge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_gt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_gt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_gt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_gt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_gt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_gt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_le_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_le_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_le_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_le_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_le_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_le_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_lg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_lg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_lt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_lt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_lt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_lt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_lt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_lt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_ne_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_ne_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_ne_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_ne_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_neq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_neq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_nge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_nge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_ngt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_ngt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_nle_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_nle_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_nlg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_nlg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_nlt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_nlt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_o_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_o_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmp_u_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmp_u_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_class_f64_e32 lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_class_f64_e32 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_eq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_eq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_eq_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_eq_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_eq_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_eq_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ge_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ge_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ge_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ge_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_gt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_gt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_gt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_gt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_gt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_gt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_le_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_le_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_le_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_le_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_le_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_le_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_lg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_lg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_lt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_lt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_lt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_lt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_lt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_lt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ne_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ne_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ne_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ne_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_neq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_neq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_nge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_nge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_ngt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_ngt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_nle_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_nle_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_nlg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_nlg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_nlt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_nlt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_o_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_o_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cmpx_u_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cmpx_u_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cvt_f32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cvt_f32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cvt_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cvt_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_cvt_u32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_cvt_u32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_floor_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_floor_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_fract_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_fract_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_frexp_exp_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_frexp_exp_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_frexp_mant_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_frexp_mant_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_max_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_max_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_min_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_min_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_mul_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_mul_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_rcp_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_rcp_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_rndne_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_rndne_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_rsq_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_rsq_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_sqrt_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_sqrt_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10
-# GFX1250: v_trunc_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
+# GFX1250: v_trunc_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10]
 
 0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40
-# GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x4063233333333333) ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40]
+# GFX1250: v_ceil_f64_e32 v[254:255], 0x4063233333333333 ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40]
 
 0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44
-# GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x448969368974c05b) ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44]
+# GFX1250: v_ceil_f64_e32 v[254:255], 0x448969368974c05b ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44]
 
 0xff,0x30,0xfc,0x7f,0x00,0x20,0x63,0x40
 # GFX1250: v_ceil_f64_e32 v[254:255], 0x40632000   ; encoding: [0xff,0x30,0xfc,0x7f,0x00,0x20,0x63,0x40]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
index 94edf22..acf7ded 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt
@@ -3,7 +3,7 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
 
 0xff,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf
-# GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456    ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x3a,0x08,0x7e
 # GFX1250: v_mov_b64_e32 v[4:5], -1                ; encoding: [0xc1,0x3a,0x08,0x7e]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
index fb3f1b2..b117d7b0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt
@@ -25,7 +25,7 @@
 
 0xe9,0x3e,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_tanh_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3e,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00
 # GFX1250-REAL16: v_tanh_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00]
@@ -41,7 +41,7 @@
 
 0xe9,0x94,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_tanh_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -66,7 +66,7 @@
 
 0xe9,0xf2,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_rcp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0xf4,0xfe,0x7e,0x7f,0x00,0x00,0x00
 # GFX1250-REAL16: v_sqrt_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
@@ -82,7 +82,7 @@
 
 0xe9,0xf4,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_sqrt_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf4,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0xf6,0xfe,0x7e,0x7f,0x00,0x00,0x00
 # GFX1250-REAL16: v_rsq_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf6,0xfe,0x7e,0x7f,0x00,0x00,0x00]
@@ -98,7 +98,7 @@
 
 0xe9,0xf6,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_rsq_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf6,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00
 # GFX1250-REAL16: v_log_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00]
@@ -114,7 +114,7 @@
 
 0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_log_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00
 # GFX1250-REAL16: v_exp_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00]
@@ -130,7 +130,7 @@
 
 0xe9,0xfa,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_exp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00
 # GFX1250-REAL16: v_sin_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00]
@@ -146,7 +146,7 @@
 
 0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00
 # GFX1250-REAL16: v_cos_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00]
@@ -162,7 +162,7 @@
 
 0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX1250-REAL16: v_cos_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00
 # GFX1250-REAL16: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00]
@@ -186,7 +186,7 @@
 
 0xe9,0xf0,0x02,0x7f,0x02,0x77,0x39,0x05
 # GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7f,0x02,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188] ; encoding: [0x02,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x02,0x77,0x39,0x05]
 
 0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05
 # GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05]
@@ -202,7 +202,7 @@
 
 0xe9,0xee,0x02,0x7f,0x02,0x77,0x39,0x05
 # GFX1250-REAL16: v_cvt_f16_fp8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7f,0x02,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188] ; encoding: [0x02,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x02,0x77,0x39,0x05]
 
 0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05
 # GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05]
@@ -230,7 +230,7 @@
 
 0xe9,0xe6,0x02,0x7f,0x02,0x77,0x39,0x05
 # GFX1250-REAL16: v_sat_pk4_i4_i8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe6,0x02,0x7f,0x02,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188] ; encoding: [0x02,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x02,0x77,0x39,0x05]
 
 0xe9,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05
 # GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05]
@@ -242,4 +242,4 @@
 
 0xe9,0xe8,0x02,0x7f,0x02,0x77,0x39,0x05
 # GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7f,0x02,0x77,0x39,0x05]
-# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188] ; encoding: [0x02,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x02,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
index 130941c..58ac4e9 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt
@@ -146,7 +146,7 @@
 # GFX1250: v_add_nc_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x50,0x73,0x72,0x71,0x3f]
 
 0xff,0x08,0x08,0x50,0x56,0x34,0x12,0xaf
-# GFX1250: v_add_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: v_add_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x7e,0x08,0x08,0x50
 # GFX1250: v_add_nc_u64_e32 v[4:5], exec, v[4:5]   ; encoding: [0x7e,0x08,0x08,0x50]
@@ -233,7 +233,7 @@
 # GFX1250: v_sub_nc_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x52,0x73,0x72,0x71,0x3f]
 
 0xff,0x08,0x08,0x52,0x56,0x34,0x12,0xaf
-# GFX1250: v_sub_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: v_sub_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x7e,0x08,0x08,0x52
 # GFX1250: v_sub_nc_u64_e32 v[4:5], exec, v[4:5]   ; encoding: [0x7e,0x08,0x08,0x52]
@@ -320,7 +320,7 @@
 # GFX1250: v_mul_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x54,0x73,0x72,0x71,0x3f]
 
 0xff,0x08,0x08,0x54,0x56,0x34,0x12,0xaf
-# GFX1250: v_mul_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: v_mul_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x7e,0x08,0x08,0x54
 # GFX1250: v_mul_u64_e32 v[4:5], exec, v[4:5]      ; encoding: [0x7e,0x08,0x08,0x54]
@@ -377,13 +377,13 @@
 # GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[2:3], 0x405ec000 ; encoding: [0xfe,0x04,0xfc,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
 
 0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
 
 0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[6:7], lit64(0x405ec66666666666) ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 0x405ec66666666666 ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 
 0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[8:9], lit64(0x405ec66666666666) ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[8:9], 0x405ec66666666666 ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 
 0xf2,0x10,0x08,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f
 # GFX1250: v_fmaak_f64 v[4:5], 1.0, v[8:9], 0x3ff00000 ; encoding: [0xf2,0x10,0x08,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f]
@@ -395,7 +395,7 @@
 # GFX1250: v_fmaak_f64 v[4:5], lit64(0x7e8), v[8:9], lit64(0x7e8) ; encoding: [0xfe,0x10,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00]
 
 0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], lit64(0x405ec66666666666) ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], 0x405ec66666666666 ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 
 0xc1,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40
 # GFX1250: v_fmaak_f64 v[6:7], -1, v[8:9], 0x405ec000 ; encoding: [0xc1,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
@@ -410,7 +410,7 @@
 # GFX1250: v_fmaak_f64 v[6:7], null, v[8:9], 0x405ec000 ; encoding: [0x7c,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
 
 0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40
-# GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], lit64(0x405ec00012345678) ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+# GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678 ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
 
 0xfd,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40
 # GFX1250: v_fmaak_f64 v[6:7], src_scc, v[8:9], 0x405ec000 ; encoding: [0xfd,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
@@ -431,13 +431,13 @@
 # GFX1250: v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[2:3] ; encoding: [0xfe,0x04,0xfc,0x47,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
 
 0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[254:255], lit64(0x405ec00012345678), lit64(0x405ec00012345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
 
 0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 
 0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 
 0xf2,0x0c,0x08,0x46,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f
 # GFX1250: v_fmamk_f64 v[4:5], 1.0, 0x3ff00000, v[6:7] ; encoding: [0xf2,0x0c,0x08,0x46,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f]
@@ -449,7 +449,7 @@
 # GFX1250: v_fmamk_f64 v[4:5], lit64(0x7e8), lit64(0x7e8), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00]
 
 0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[4:5], v[2:3], lit64(0x405ec66666666666), v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[4:5], v[2:3], 0x405ec66666666666, v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40]
 
 0xc1,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40
 # GFX1250: v_fmamk_f64 v[6:7], -1, 0x405ec000, v[2:3] ; encoding: [0xc1,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
@@ -464,7 +464,7 @@
 # GFX1250: v_fmamk_f64 v[6:7], null, 0x405ec000, v[2:3] ; encoding: [0x7c,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
 
 0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40
-# GFX1250: v_fmamk_f64 v[6:7], s[2:3], lit64(0x405ec00012345678), v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
+# GFX1250: v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40]
 
 0xfd,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40
 # GFX1250: v_fmamk_f64 v[6:7], src_scc, 0x405ec000, v[2:3] ; encoding: [0xfd,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
index c88fbc2..06ef877 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
@@ -868,7 +868,7 @@
 
 0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x2d,0x80,0xbe
 # GFX12: s_and_not0_saveexec_b64 s[0:1], -1      ; encoding: [0xc1,0x2d,0x80,0xbe]
@@ -959,7 +959,7 @@
 
 0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not0_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x35,0x80,0xbe
 # GFX12: s_and_not0_wrexec_b64 s[0:1], -1        ; encoding: [0xc1,0x35,0x80,0xbe]
@@ -1050,7 +1050,7 @@
 
 0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x31,0x80,0xbe
 # GFX12: s_and_not1_saveexec_b64 s[0:1], -1      ; encoding: [0xc1,0x31,0x80,0xbe]
@@ -1141,7 +1141,7 @@
 
 0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not1_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x37,0x80,0xbe
 # GFX12: s_and_not1_wrexec_b64 s[0:1], -1        ; encoding: [0xc1,0x37,0x80,0xbe]
@@ -1232,7 +1232,7 @@
 
 0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_and_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x21,0x80,0xbe
 # GFX12: s_and_saveexec_b64 s[0:1], -1           ; encoding: [0xc1,0x21,0x80,0xbe]
@@ -1341,7 +1341,7 @@
 
 0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_bcnt0_i32_b64 s0, 0xaf123456          ; encoding: [0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_bcnt0_i32_b64 s0, lit64(0xaf123456)   ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_bcnt0_i32_b64 s0, 0xaf123456          ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x17,0x80,0xbe
 # GFX12: s_bcnt0_i32_b64 s0, -1                  ; encoding: [0xc1,0x17,0x80,0xbe]
@@ -1453,7 +1453,7 @@
 
 0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_bcnt1_i32_b64 s0, 0xaf123456          ; encoding: [0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_bcnt1_i32_b64 s0, lit64(0xaf123456)   ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_bcnt1_i32_b64 s0, 0xaf123456          ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x19,0x80,0xbe
 # GFX12: s_bcnt1_i32_b64 s0, -1                  ; encoding: [0xc1,0x19,0x80,0xbe]
@@ -1832,7 +1832,7 @@
 
 0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_brev_b64 s[0:1], 0xaf123456           ; encoding: [0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_brev_b64 s[0:1], lit64(0xaf123456)    ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_brev_b64 s[0:1], 0xaf123456           ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x05,0x80,0xbe
 # GFX12: s_brev_b64 s[0:1], -1                   ; encoding: [0xc1,0x05,0x80,0xbe]
@@ -1887,7 +1887,7 @@
 
 0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_cls_i32_i64 s0, 0xaf123456            ; encoding: [0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cls_i32_i64 s0, lit64(0xaf123456)     ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cls_i32_i64 s0, 0xaf123456            ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x0d,0x80,0xbe
 # GFX12: s_cls_i32_i64 s0, -1                    ; encoding: [0xc1,0x0d,0x80,0xbe]
@@ -2053,7 +2053,7 @@
 
 0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_clz_i32_u64 s0, 0xaf123456            ; encoding: [0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_clz_i32_u64 s0, lit64(0xaf123456)     ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_clz_i32_u64 s0, 0xaf123456            ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x0b,0x80,0xbe
 # GFX12: s_clz_i32_u64 s0, -1                    ; encoding: [0xc1,0x0b,0x80,0xbe]
@@ -2159,7 +2159,7 @@
 
 0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_cmov_b64 s[0:1], 0xaf123456           ; encoding: [0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cmov_b64 s[0:1], lit64(0xaf123456)    ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cmov_b64 s[0:1], 0xaf123456           ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x03,0x80,0xbe
 # GFX12: s_cmov_b64 s[0:1], -1                   ; encoding: [0xc1,0x03,0x80,0xbe]
@@ -2268,7 +2268,7 @@
 
 0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_ctz_i32_b64 s0, 0xaf123456            ; encoding: [0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_ctz_i32_b64 s0, lit64(0xaf123456)     ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_ctz_i32_b64 s0, 0xaf123456            ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x09,0x80,0xbe
 # GFX12: s_ctz_i32_b64 s0, -1                    ; encoding: [0xc1,0x09,0x80,0xbe]
@@ -2396,7 +2396,7 @@
 
 0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_mov_b64 s[0:1], 0xaf123456            ; encoding: [0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_mov_b64 s[0:1], lit64(0xaf123456)     ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_mov_b64 s[0:1], 0xaf123456            ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x01,0x80,0xbe
 # GFX12: s_mov_b64 s[0:1], -1                    ; encoding: [0xc1,0x01,0x80,0xbe]
@@ -2493,7 +2493,7 @@
 
 0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_movreld_b64 s[0:1], 0xaf123456        ; encoding: [0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_movreld_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_movreld_b64 s[0:1], 0xaf123456        ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x43,0x80,0xbe
 # GFX12: s_movreld_b64 s[0:1], -1                ; encoding: [0xc1,0x43,0x80,0xbe]
@@ -2662,7 +2662,7 @@
 
 0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_nand_saveexec_b64 s[0:1], 0xaf123456  ; encoding: [0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nand_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nand_saveexec_b64 s[0:1], 0xaf123456  ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x27,0x80,0xbe
 # GFX12: s_nand_saveexec_b64 s[0:1], -1          ; encoding: [0xc1,0x27,0x80,0xbe]
@@ -2753,7 +2753,7 @@
 
 0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_nor_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nor_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x29,0x80,0xbe
 # GFX12: s_nor_saveexec_b64 s[0:1], -1           ; encoding: [0xc1,0x29,0x80,0xbe]
@@ -2856,7 +2856,7 @@
 
 0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_not_b64 s[0:1], 0xaf123456            ; encoding: [0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_not_b64 s[0:1], lit64(0xaf123456)     ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_not_b64 s[0:1], 0xaf123456            ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x1f,0x80,0xbe
 # GFX12: s_not_b64 s[0:1], -1                    ; encoding: [0xc1,0x1f,0x80,0xbe]
@@ -2947,7 +2947,7 @@
 
 0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x2f,0x80,0xbe
 # GFX12: s_or_not0_saveexec_b64 s[0:1], -1       ; encoding: [0xc1,0x2f,0x80,0xbe]
@@ -3038,7 +3038,7 @@
 
 0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x33,0x80,0xbe
 # GFX12: s_or_not1_saveexec_b64 s[0:1], -1       ; encoding: [0xc1,0x33,0x80,0xbe]
@@ -3129,7 +3129,7 @@
 
 0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_or_saveexec_b64 s[0:1], 0xaf123456    ; encoding: [0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_saveexec_b64 s[0:1], 0xaf123456    ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x23,0x80,0xbe
 # GFX12: s_or_saveexec_b64 s[0:1], -1            ; encoding: [0xc1,0x23,0x80,0xbe]
@@ -3232,7 +3232,7 @@
 
 0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_quadmask_b64 s[0:1], 0xaf123456       ; encoding: [0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_quadmask_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_quadmask_b64 s[0:1], 0xaf123456       ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x1b,0x80,0xbe
 # GFX12: s_quadmask_b64 s[0:1], -1               ; encoding: [0xc1,0x1b,0x80,0xbe]
@@ -3549,7 +3549,7 @@
 
 0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_wqm_b64 s[0:1], 0xaf123456            ; encoding: [0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_wqm_b64 s[0:1], lit64(0xaf123456)     ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_wqm_b64 s[0:1], 0xaf123456            ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x1d,0x80,0xbe
 # GFX12: s_wqm_b64 s[0:1], -1                    ; encoding: [0xc1,0x1d,0x80,0xbe]
@@ -3640,7 +3640,7 @@
 
 0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_xnor_saveexec_b64 s[0:1], 0xaf123456  ; encoding: [0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xnor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xnor_saveexec_b64 s[0:1], 0xaf123456  ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x2b,0x80,0xbe
 # GFX12: s_xnor_saveexec_b64 s[0:1], -1          ; encoding: [0xc1,0x2b,0x80,0xbe]
@@ -3731,7 +3731,7 @@
 
 0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf
 # GFX1200: s_xor_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xor_saveexec_b64 s[0:1], 0xaf123456   ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x25,0x80,0xbe
 # GFX12: s_xor_saveexec_b64 s[0:1], -1           ; encoding: [0xc1,0x25,0x80,0xbe]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
index d889931..47b7408 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt
@@ -56,7 +56,7 @@
 
 0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf
 # GFX1200: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf]
-# GFX1250: s_add_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0x7e,0x80,0xa9
 # GFX12: s_add_nc_u64 s[0:1], s[2:3], exec       ; encoding: [0x02,0x7e,0x80,0xa9]
@@ -81,7 +81,7 @@
 
 0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf
 # GFX1200: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf]
-# GFX1250: s_add_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0x04,0x00,0xaa
 # GFX12: s_sub_nc_u64 s[0:1], s[2:3], s[4:5]     ; encoding: [0x02,0x04,0x00,0xaa]
@@ -136,7 +136,7 @@
 
 0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf
 # GFX1200: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf]
-# GFX1250: s_sub_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0x7e,0x00,0xaa
 # GFX12: s_sub_nc_u64 s[0:1], s[2:3], exec       ; encoding: [0x02,0x7e,0x00,0xaa]
@@ -161,7 +161,7 @@
 
 0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf
 # GFX1200: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf]
-# GFX1250: s_sub_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0x04,0x80,0xaa
 # GFX12: s_mul_u64 s[0:1], s[2:3], s[4:5]        ; encoding: [0x02,0x04,0x80,0xaa]
@@ -216,7 +216,7 @@
 
 0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf
 # GFX1200: s_mul_u64 s[0:1], 0xaf123456, s[2:3]    ; encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf]
-# GFX1250: s_mul_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_mul_u64 s[0:1], 0xaf123456, s[2:3]    ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0x7e,0x80,0xaa
 # GFX12: s_mul_u64 s[0:1], s[2:3], exec          ; encoding: [0x02,0x7e,0x80,0xaa]
@@ -241,7 +241,7 @@
 
 0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf
 # GFX1200: s_mul_u64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf]
-# GFX1250: s_mul_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_mul_u64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x01,0x02,0x05,0xa0
 # GFX12: s_add_f32 s5, s1, s2                    ; encoding: [0x01,0x02,0x05,0xa0]
@@ -1697,7 +1697,7 @@
 
 0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf
 # GFX1200: s_and_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x8b
 # GFX12: s_and_b64 s[0:1], -1, s[4:5]            ; encoding: [0xc1,0x04,0x80,0x8b]
@@ -1725,7 +1725,7 @@
 
 0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf
 # GFX1200: s_and_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0xc1,0x80,0x8b
 # GFX12: s_and_b64 s[0:1], s[2:3], -1            ; encoding: [0x02,0xc1,0x80,0x8b]
@@ -1882,7 +1882,7 @@
 
 0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf
 # GFX1200: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x91
 # GFX12: s_and_not1_b64 s[0:1], -1, s[4:5]       ; encoding: [0xc1,0x04,0x80,0x91]
@@ -1910,7 +1910,7 @@
 
 0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf
 # GFX1200: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf]
-# GFX1250: s_and_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0xc1,0x80,0x91
 # GFX12: s_and_not1_b64 s[0:1], s[2:3], -1       ; encoding: [0x02,0xc1,0x80,0x91]
@@ -2067,7 +2067,7 @@
 
 0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf
 # GFX1200: s_ashr_i64 s[0:1], 0xaf123456, s4       ; encoding: [0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf]
-# GFX1250: s_ashr_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_ashr_i64 s[0:1], 0xaf123456, s4       ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x86
 # GFX12: s_ashr_i64 s[0:1], -1, s4               ; encoding: [0xc1,0x04,0x80,0x86]
@@ -2251,7 +2251,7 @@
 
 0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf
 # GFX1200: s_bfe_i64 s[0:1], 0xaf123456, s4        ; encoding: [0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf]
-# GFX1250: s_bfe_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_bfe_i64 s[0:1], 0xaf123456, s4        ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x94
 # GFX12: s_bfe_i64 s[0:1], -1, s4                ; encoding: [0xc1,0x04,0x80,0x94]
@@ -2435,7 +2435,7 @@
 
 0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf
 # GFX1200: s_bfe_u64 s[0:1], 0xaf123456, s4        ; encoding: [0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf]
-# GFX1250: s_bfe_u64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_bfe_u64 s[0:1], 0xaf123456, s4        ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x00,0x94
 # GFX12: s_bfe_u64 s[0:1], -1, s4                ; encoding: [0xc1,0x04,0x00,0x94]
@@ -2820,7 +2820,7 @@
 
 0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf
 # GFX1200: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cselect_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x98
 # GFX12: s_cselect_b64 s[0:1], -1, s[4:5]        ; encoding: [0xc1,0x04,0x80,0x98]
@@ -2848,7 +2848,7 @@
 
 0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf
 # GFX1200: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cselect_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0xc1,0x80,0x98
 # GFX12: s_cselect_b64 s[0:1], s[2:3], -1        ; encoding: [0x02,0xc1,0x80,0x98]
@@ -3425,7 +3425,7 @@
 
 0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf
 # GFX1200: s_lshl_b64 s[0:1], 0xaf123456, s4       ; encoding: [0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf]
-# GFX1250: s_lshl_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_lshl_b64 s[0:1], 0xaf123456, s4       ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x84
 # GFX12: s_lshl_b64 s[0:1], -1, s4               ; encoding: [0xc1,0x04,0x80,0x84]
@@ -3609,7 +3609,7 @@
 
 0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf
 # GFX1200: s_lshr_b64 s[0:1], 0xaf123456, s4       ; encoding: [0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf]
-# GFX1250: s_lshr_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_lshr_b64 s[0:1], 0xaf123456, s4       ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x85
 # GFX12: s_lshr_b64 s[0:1], -1, s4               ; encoding: [0xc1,0x04,0x80,0x85]
@@ -4528,7 +4528,7 @@
 
 0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf
 # GFX1200: s_nand_b64 s[0:1], 0xaf123456, s[4:5]   ; encoding: [0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nand_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nand_b64 s[0:1], 0xaf123456, s[4:5]   ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x8e
 # GFX12: s_nand_b64 s[0:1], -1, s[4:5]           ; encoding: [0xc1,0x04,0x80,0x8e]
@@ -4556,7 +4556,7 @@
 
 0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf
 # GFX1200: s_nand_b64 s[0:1], s[2:3], 0xaf123456   ; encoding: [0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nand_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nand_b64 s[0:1], s[2:3], 0xaf123456   ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0xc1,0x80,0x8e
 # GFX12: s_nand_b64 s[0:1], s[2:3], -1           ; encoding: [0x02,0xc1,0x80,0x8e]
@@ -4713,7 +4713,7 @@
 
 0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf
 # GFX1200: s_nor_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nor_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x8f
 # GFX12: s_nor_b64 s[0:1], -1, s[4:5]            ; encoding: [0xc1,0x04,0x80,0x8f]
@@ -4741,7 +4741,7 @@
 
 0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf
 # GFX1200: s_nor_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf]
-# GFX1250: s_nor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_nor_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0xc1,0x80,0x8f
 # GFX12: s_nor_b64 s[0:1], s[2:3], -1            ; encoding: [0x02,0xc1,0x80,0x8f]
@@ -4898,7 +4898,7 @@
 
 0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf
 # GFX1200: s_or_b64 s[0:1], 0xaf123456, s[4:5]     ; encoding: [0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_b64 s[0:1], 0xaf123456, s[4:5]     ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x8c
 # GFX12: s_or_b64 s[0:1], -1, s[4:5]             ; encoding: [0xc1,0x04,0x80,0x8c]
@@ -4926,7 +4926,7 @@
 
 0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf
 # GFX1200: s_or_b64 s[0:1], s[2:3], 0xaf123456     ; encoding: [0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_b64 s[0:1], s[2:3], 0xaf123456     ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0xc1,0x80,0x8c
 # GFX12: s_or_b64 s[0:1], s[2:3], -1             ; encoding: [0x02,0xc1,0x80,0x8c]
@@ -5083,7 +5083,7 @@
 
 0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf
 # GFX1200: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x92
 # GFX12: s_or_not1_b64 s[0:1], -1, s[4:5]        ; encoding: [0xc1,0x04,0x80,0x92]
@@ -5111,7 +5111,7 @@
 
 0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf
 # GFX1200: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf]
-# GFX1250: s_or_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0xc1,0x80,0x92
 # GFX12: s_or_not1_b64 s[0:1], s[2:3], -1        ; encoding: [0x02,0xc1,0x80,0x92]
@@ -5898,7 +5898,7 @@
 
 0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf
 # GFX1200: s_xnor_b64 s[0:1], 0xaf123456, s[4:5]   ; encoding: [0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xnor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xnor_b64 s[0:1], 0xaf123456, s[4:5]   ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x90
 # GFX12: s_xnor_b64 s[0:1], -1, s[4:5]           ; encoding: [0xc1,0x04,0x80,0x90]
@@ -5926,7 +5926,7 @@
 
 0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf
 # GFX1200: s_xnor_b64 s[0:1], s[2:3], 0xaf123456   ; encoding: [0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xnor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xnor_b64 s[0:1], s[2:3], 0xaf123456   ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0xc1,0x80,0x90
 # GFX12: s_xnor_b64 s[0:1], s[2:3], -1           ; encoding: [0x02,0xc1,0x80,0x90]
@@ -6083,7 +6083,7 @@
 
 0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf
 # GFX1200: s_xor_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xor_b64 s[0:1], 0xaf123456, s[4:5]    ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0xc1,0x04,0x80,0x8d
 # GFX12: s_xor_b64 s[0:1], -1, s[4:5]            ; encoding: [0xc1,0x04,0x80,0x8d]
@@ -6111,7 +6111,7 @@
 
 0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf
 # GFX1200: s_xor_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf]
-# GFX1250: s_xor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_xor_b64 s[0:1], s[2:3], 0xaf123456    ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x02,0xc1,0x80,0x8d
 # GFX12: s_xor_b64 s[0:1], s[2:3], -1            ; encoding: [0x02,0xc1,0x80,0x8d]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt
index a8da16f..9355582 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt
@@ -1492,7 +1492,7 @@
 
 0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf
 # GFX1200: s_cmp_eq_u64 s[0:1], 0xaf123456         ; encoding: [0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cmp_eq_u64 s[0:1], lit64(0xaf123456)  ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cmp_eq_u64 s[0:1], 0xaf123456         ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x00,0xc1,0x10,0xbf
 # GFX12: s_cmp_eq_u64 s[0:1], -1                 ; encoding: [0x00,0xc1,0x10,0xbf]
@@ -2015,7 +2015,7 @@
 
 0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf
 # GFX1200: s_cmp_lg_u64 s[0:1], 0xaf123456         ; encoding: [0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf]
-# GFX1250: s_cmp_lg_u64 s[0:1], lit64(0xaf123456)  ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
+# GFX1250: s_cmp_lg_u64 s[0:1], 0xaf123456         ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00]
 
 0x00,0xc1,0x11,0xbf
 # GFX12: s_cmp_lg_u64 s[0:1], -1                 ; encoding: [0x00,0xc1,0x11,0xbf]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt
index 7a7be57..d6a176e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt
@@ -1,10 +1,10 @@
 # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX1200-FAKE16 %s
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX1200-FAKE16 %s
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX1250-FAKE16 %s
 
 0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05
 # GFX12: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05]
@@ -22,7 +22,8 @@
 
 0xe9,0xb8,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_ceil_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb8,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xb8,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_ceil_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb8,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -55,7 +56,8 @@
 
 0xe9,0xc2,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_cos_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc2,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xc2,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cos_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc2,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -94,7 +96,8 @@
 
 0xe9,0x14,0x0a,0x7f,0x01,0x77,0x39,0x05
 # GFX12-REAL16: v_cvt_f16_f32_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x14,0x0a,0x7f,0x01,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[1:2], v[187:188] ; encoding: [0x01,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[1:2], v[187:188] ; encoding: [0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[1:2]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x01,0x77,0x39,0x05]
 
 0xea,0x14,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_f16_f32_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x14,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -109,7 +112,8 @@
 
 0xe9,0xa2,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_cvt_f16_i16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa2,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xa2,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_f16_i16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa2,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -124,7 +128,8 @@
 
 0xe9,0xa0,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_cvt_f16_u16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa0,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xa0,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_f16_u16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa0,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -197,7 +202,8 @@
 
 0xe9,0xa6,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_cvt_i16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa6,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xa6,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_i16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa6,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -240,7 +246,8 @@
 
 0xe9,0xc6,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_cvt_norm_i16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc6,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xc6,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_norm_i16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc6,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -255,7 +262,8 @@
 
 0xe9,0xc8,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_cvt_norm_u16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc8,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xc8,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_norm_u16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc8,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -276,7 +284,8 @@
 
 0xe9,0xa4,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_cvt_u16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa4,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xa4,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_u16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa4,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -313,7 +322,8 @@
 
 0xe9,0xb0,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_exp_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb0,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xb0,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_exp_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb0,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -334,7 +344,8 @@
 
 0xe9,0xb6,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_floor_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb6,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xb6,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_floor_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb6,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -355,7 +366,8 @@
 
 0xe9,0xbe,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_fract_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xbe,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xbe,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_fract_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xbe,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -376,7 +388,8 @@
 
 0xe9,0xb4,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_frexp_exp_i16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb4,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xb4,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_frexp_exp_i16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb4,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -397,7 +410,8 @@
 
 0xe9,0xb2,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_frexp_mant_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb2,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xb2,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_frexp_mant_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb2,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -418,7 +432,8 @@
 
 0xe9,0xae,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_log_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xae,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xae,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_log_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xae,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -469,7 +484,8 @@
 
 0xe9,0xd2,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_not_b16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd2,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xd2,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_not_b16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd2,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -491,7 +507,8 @@
 
 0xe9,0xa8,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_rcp_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa8,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xa8,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_rcp_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa8,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -518,7 +535,8 @@
 
 0xe9,0xbc,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_rndne_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xbc,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xbc,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_rndne_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xbc,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -539,7 +557,8 @@
 
 0xe9,0xac,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_rsq_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xac,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xac,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_rsq_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xac,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -560,7 +579,8 @@
 
 0xe9,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05
 # GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[1:2], v[187:188] ; encoding: [0x01,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[1:2], v[187:188] ; encoding: [0x01,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[1:2]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x01,0x77,0x39,0x05]
 
 0xea,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_sat_pk_u8_i16_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -575,7 +595,8 @@
 
 0xe9,0xc0,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_sin_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc0,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xc0,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_sin_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc0,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -596,7 +617,8 @@
 
 0xe9,0xaa,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_sqrt_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xaa,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xaa,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_sqrt_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xaa,0xfe,0x7f,0xff,0x00,0x00,0x00]
@@ -617,7 +639,8 @@
 
 0xe9,0xba,0x0a,0x7f,0x81,0x77,0x39,0x05
 # GFX12-REAL16: v_trunc_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xba,0x0a,0x7f,0x81,0x77,0x39,0x05]
-# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05]
+# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05]
 
 0xea,0xba,0xfe,0x7f,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_trunc_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xba,0xfe,0x7f,0xff,0x00,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
index 802d6368..60f058d 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
@@ -19311,6 +19311,27 @@
 # CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 clamp ; encoding: [0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04]
 0x05,0x80,0x77,0xd2,0x00,0x04,0x0e,0x04
 
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0x40,0x77,0xd2,0x00,0x04,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0x20,0x77,0xd2,0x00,0x04,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[0,0,1,1] ; encoding: [0x05,0x60,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0x60,0x77,0xd2,0x00,0x04,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0x08,0x77,0xd2,0x00,0x04,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,0,1] ; encoding: [0x05,0x48,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0x48,0x77,0xd2,0x00,0x04,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,0] ; encoding: [0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0x28,0x77,0xd2,0x00,0x04,0x0e,0x04
+
+# CHECK: v_interp_p2_f16 v5, v2, attr0.x, v3 op_sel:[1,0,1,1] ; encoding: [0x05,0x68,0x77,0xd2,0x00,0x04,0x0e,0x04]
+0x05,0x68,0x77,0xd2,0x00,0x04,0x0e,0x04
+
 # CHECK: v_add_f64 v[5:6], v[1:2], v[2:3]        ; encoding: [0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00]
 0x05,0x00,0x80,0xd2,0x01,0x05,0x02,0x00
 
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index b8cd6de..111616d 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -420,8 +420,8 @@
 .attribute arch, "rv32ia_zacas1p0"
 # CHECK: attribute      5, "rv32i2p1_a2p1_zaamo1p0_zacas1p0_zalrsc1p0"
 
-.attribute arch, "rv32izalasr0p1"
-# CHECK: attribute      5, "rv32i2p1_zalasr0p1"
+.attribute arch, "rv32izalasr0p9"
+# CHECK: attribute      5, "rv32i2p1_zalasr0p9"
 
 .attribute arch, "rv32i_xcvalu"
 # CHECK: attribute      5, "rv32i2p1_xcvalu1p0"
diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td b/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td
index c224cd6..98a376b 100644
--- a/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td
+++ b/llvm/test/TableGen/RuntimeLibcallEmitter-calling-conv.td
@@ -48,47 +48,79 @@ def MSP430LibraryWithCondCC : SystemRuntimeLibrary<isMSP430,
 // CHECK-NEXT:     Entry = DefaultCC;
 // CHECK-NEXT:   }
 // CHECK-EMPTY:
-// CHECK-NEXT:    setLibcallsImpl({
-// CHECK-NEXT:      {RTLIB::MALLOC, RTLIB::impl_malloc}, // malloc
-// CHECK-NEXT:    });
+// CHECK-NEXT:    static const LibcallImplPair LibraryCalls[] = {
+// CHECK-NEXT:        {RTLIB::MALLOC, RTLIB::impl_malloc}, // malloc
+// CHECK-NEXT:    };
+// CHECK-EMPTY:
+// CHECK-NEXT:    for (const auto [Func, Impl] : LibraryCalls) {
+// CHECK-NEXT:      setLibcallImpl(Func, Impl);
+// CHECK-NEXT:    }
 // CHECK-EMPTY:
-// CHECK-NEXT:    setLibcallsImpl({
+// CHECK-NEXT:    static const LibcallImplPair LibraryCalls_AlwaysAvailable_AVR_BUILTIN[] = {
 // CHECK-NEXT:        {RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4}, // __divmodqi4
 // CHECK-NEXT:        {RTLIB::UDIVREM_I16, RTLIB::impl___udivmodhi4}, // __udivmodhi4
-// CHECK-NEXT:    }, CallingConv::AVR_BUILTIN);
+// CHECK-NEXT:    };
+// CHECK-EMPTY:
+// CHECK-NEXT:    for (const auto [Func, Impl] : LibraryCalls_AlwaysAvailable_AVR_BUILTIN) {
+// CHECK-NEXT:      setLibcallImpl(Func, Impl);
+// CHECK-NEXT:      setLibcallImplCallingConv(Impl, CallingConv::AVR_BUILTIN);
+// CHECK-NEXT:    }
 // CHECK-EMPTY:
 // CHECK-NEXT:    return;
 // CHECK-NEXT:  }
 // CHECK-EMPTY:
 // CHECK-NEXT: if (TT.getArch() == Triple::avr) {
-// CHECK-NEXT:   setLibcallsImpl({
+// CHECK-NEXT:   static const LibcallImplPair LibraryCalls[] = {
 // CHECK-NEXT:       {RTLIB::MALLOC, RTLIB::impl_malloc}, // malloc
-// CHECK-NEXT:   });
+// CHECK-NEXT:   };
+// CHECK-EMPTY:
+// CHECK-NEXT:   for (const auto [Func, Impl] : LibraryCalls) {
+// CHECK-NEXT:     setLibcallImpl(Func, Impl);
+// CHECK-NEXT:   }
 // CHECK-EMPTY:
-// CHECK-NEXT:   setLibcallsImpl({
+// CHECK-NEXT:   static const LibcallImplPair LibraryCalls_AlwaysAvailable_AVR_BUILTIN[] = {
 // CHECK-NEXT:       {RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4}, // __divmodqi4
 // CHECK-NEXT:       {RTLIB::UDIVREM_I16, RTLIB::impl___udivmodhi4}, // __udivmodhi4
-// CHECK-NEXT:   }, CallingConv::AVR_BUILTIN);
+// CHECK-NEXT:   };
+// CHECK-EMPTY:
+// CHECK-NEXT:   for (const auto [Func, Impl] : LibraryCalls_AlwaysAvailable_AVR_BUILTIN) {
+// CHECK-NEXT:     setLibcallImpl(Func, Impl);
+// CHECK-NEXT:     setLibcallImplCallingConv(Impl, CallingConv::AVR_BUILTIN);
+// CHECK-NEXT:   }
 // CHECK-EMPTY:
 // CHECK-NEXT:   return;
 // CHECK-NEXT:  }
 // CHECK-EMPTY:
 // CHECK-NEXT:  if (TT.getArch() == Triple::msp430) {
-// CHECK-NEXT:    setLibcallsImpl({
+// CHECK-NEXT:    static const LibcallImplPair LibraryCalls[] = {
 // CHECK-NEXT:        {RTLIB::MALLOC, RTLIB::impl_malloc}, // malloc
-// CHECK-NEXT:    });
+// CHECK-NEXT:    };
+// CHECK-EMPTY:
+// CHECK-NEXT:    for (const auto [Func, Impl] : LibraryCalls) {
+// CHECK-NEXT:      setLibcallImpl(Func, Impl);
+// CHECK-NEXT:    }
 // CHECK-EMPTY:
 // CHECK-NEXT:    if ( isFoo() ) {
-// CHECK-NEXT:      setLibcallsImpl({
-// CHECK-NEXT:        {RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4}, // __divmodqi4
-// CHECK-NEXT:      }, CallingConv::AVR_BUILTIN);
+// CHECK-NEXT:      static const LibcallImplPair LibraryCalls_anonymous_3_AVR_BUILTIN[] = {
+// CHECK-NEXT:          {RTLIB::SDIVREM_I8, RTLIB::impl___divmodqi4}, // __divmodqi4
+// CHECK-NEXT:      };
+// CHECK-EMPTY:
+// CHECK-NEXT:      for (const auto [Func, Impl] : LibraryCalls_anonymous_3_AVR_BUILTIN) {
+// CHECK-NEXT:        setLibcallImpl(Func, Impl);
+// CHECK-NEXT:        setLibcallImplCallingConv(Impl, CallingConv::AVR_BUILTIN);
+// CHECK-NEXT:      }
 // CHECK-EMPTY:
 // CHECK-NEXT:    }
 // CHECK-EMPTY:
 // CHECK-NEXT:    if ( isBar() ) {
-// CHECK-NEXT:      setLibcallsImpl({
+// CHECK-NEXT:      static const LibcallImplPair LibraryCalls_anonymous_5_MSP430_BUILTIN[] = {
 // CHECK-NEXT:          {RTLIB::UDIVREM_I16, RTLIB::impl___udivmodhi4}, // __udivmodhi4
-// CHECK-NEXT:      }, CallingConv::MSP430_BUILTIN);
+// CHECK-NEXT:      };
+// CHECK-EMPTY:
+// CHECK-NEXT:      for (const auto [Func, Impl] : LibraryCalls_anonymous_5_MSP430_BUILTIN) {
+// CHECK-NEXT:        setLibcallImpl(Func, Impl);
+// CHECK-NEXT:        setLibcallImplCallingConv(Impl, CallingConv::MSP430_BUILTIN);
+// CHECK-NEXT:      }
 // CHECK-EMPTY:
 // CHECK-NEXT:    }
 // CHECK-EMPTY:
diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter-conflict-warning.td b/llvm/test/TableGen/RuntimeLibcallEmitter-conflict-warning.td
index 8169f56..136c81b 100644
--- a/llvm/test/TableGen/RuntimeLibcallEmitter-conflict-warning.td
+++ b/llvm/test/TableGen/RuntimeLibcallEmitter-conflict-warning.td
@@ -25,9 +25,9 @@ def dup1 : RuntimeLibcallImpl<ANOTHER_DUP>;
 // func_a and func_b both provide SOME_FUNC.
 
 // CHECK: if (isTargetArchA()) {
-// CHECK-NEXT: setLibcallsImpl({
+// CHECK-NEXT: static const LibcallImplPair LibraryCalls[] = {
 // CHECK-NEXT:   {RTLIB::SOME_FUNC, RTLIB::impl_func_b}, // func_b
-// CHECK-NEXT: });
+// CHECK-NEXT: };
 
 // ERR: :[[@LINE+1]]:5: warning: conflicting implementations for libcall SOME_FUNC: func_b, func_a
 def TheSystemLibraryA : SystemRuntimeLibrary<isTargetArchA,
@@ -35,10 +35,10 @@ def TheSystemLibraryA : SystemRuntimeLibrary<isTargetArchA,
 >;
 
 // CHECK: if (isTargetArchB()) {
-// CHECK-NEXT: setLibcallsImpl({
+// CHECK-NEXT: static const LibcallImplPair LibraryCalls[] = {
 // CHECK-NEXT:   {RTLIB::OTHER_FUNC, RTLIB::impl_other_func}, // other_func
-// CHECK-NEXT:   {RTLIB::SOME_FUNC, RTLIB::impl_func_a}, // func_a
-// CHECK-NEXT: });
+// CHECK-NEXT:  {RTLIB::SOME_FUNC, RTLIB::impl_func_a}, // func_a
+// CHECK-NEXT: };
 
 // ERR: :[[@LINE+1]]:5: warning: conflicting implementations for libcall SOME_FUNC: func_a, func_b
 def TheSystemLibraryB : SystemRuntimeLibrary<isTargetArchB,
@@ -46,11 +46,11 @@ def TheSystemLibraryB : SystemRuntimeLibrary<isTargetArchB,
 >;
 
 // CHECK: if (isTargetArchC()) {
-// CHECK-NEXT: setLibcallsImpl({
+// CHECK-NEXT: static const LibcallImplPair LibraryCalls[] = {
 // CHECK-NEXT:   {RTLIB::ANOTHER_DUP, RTLIB::impl_dup1}, // dup1
 // CHECK-NEXT:   {RTLIB::OTHER_FUNC, RTLIB::impl_other_func}, // other_func
 // CHECK-NEXT:   {RTLIB::SOME_FUNC, RTLIB::impl_func_a}, // func_a
-// CHECK-NEXT: });
+// CHECK-NEXT: };
 
 // ERR: :[[@LINE+3]]:5: warning: conflicting implementations for libcall ANOTHER_DUP: dup1, dup0
 // ERR: :[[@LINE+2]]:5: warning: conflicting implementations for libcall SOME_FUNC: func_a, func_b
diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td b/llvm/test/TableGen/RuntimeLibcallEmitter.td
index 78705e2..c336fee 100644
--- a/llvm/test/TableGen/RuntimeLibcallEmitter.td
+++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td
@@ -194,36 +194,38 @@ def BlahLibrary : SystemRuntimeLibrary<isBlahArch, (add calloc, LibraryWithCondi
 // CHECK-NEXT:    RTLIB::Libcall Func;
 // CHECK-NEXT:    RTLIB::LibcallImpl Impl;
 // CHECK-NEXT:  };
-// CHECK-NEXT:  auto setLibcallsImpl = [this](
-// CHECK-NEXT:    ArrayRef<LibcallImplPair> Libcalls,
-// CHECK-NEXT:    std::optional<llvm::CallingConv::ID> CC = {})
-// CHECK-NEXT:  {
-// CHECK-NEXT:    for (const auto [Func, Impl] : Libcalls) {
-// CHECK-NEXT:      setLibcallImpl(Func, Impl);
-// CHECK-NEXT:      if (CC)
-// CHECK-NEXT:        setLibcallImplCallingConv(Impl, *CC);
-// CHECK-NEXT:    }
-// CHECK-NEXT:  };
 // CHECK-EMPTY:
 // CHECK-NEXT: if (TT.getArch() == Triple::blah) {
-// CHECK-NEXT:     setLibcallsImpl({
+// CHECK-NEXT:     static const LibcallImplPair LibraryCalls[] = {
 // CHECK-NEXT:         {RTLIB::BZERO, RTLIB::impl_bzero}, // bzero
 // CHECK-NEXT:         {RTLIB::CALLOC, RTLIB::impl_calloc}, // calloc
 // CHECK-NEXT:         {RTLIB::SQRT_F128, RTLIB::impl_sqrtl_f128}, // sqrtl
-// CHECK-NEXT:     });
+// CHECK-NEXT:     };
+// CHECK-EMPTY:
+// CHECK-NEXT:     for (const auto [Func, Impl] : LibraryCalls) {
+// CHECK-NEXT:       setLibcallImpl(Func, Impl);
+// CHECK-NEXT:     }
 // CHECK-EMPTY:
 // CHECK-NEXT:    if (TT.hasCompilerRT()) {
-// CHECK-NEXT:      setLibcallsImpl({
+// CHECK-NEXT:      static const LibcallImplPair LibraryCalls_hasCompilerRT[] = {
 // CHECK-NEXT:          {RTLIB::SHL_I32, RTLIB::impl___ashlsi3}, // __ashlsi3
 // CHECK-NEXT:          {RTLIB::SRL_I64, RTLIB::impl___lshrdi3}, // __lshrdi3
-// CHECK-NEXT:      });
+// CHECK-NEXT:      };
+// CHECK-EMPTY:
+// CHECK-NEXT:      for (const auto [Func, Impl] : LibraryCalls_hasCompilerRT) {
+// CHECK-NEXT:        setLibcallImpl(Func, Impl);
+// CHECK-NEXT:      }
 // CHECK-EMPTY:
 // CHECK-NEXT:    }
 // CHECK-EMPTY:
 // CHECK-NEXT:    if (TT.getOS() == Triple::bar) {
-// CHECK-NEXT:      setLibcallsImpl({
+// CHECK-NEXT:      static const LibcallImplPair LibraryCalls_isBarOS[] = {
 // CHECK-NEXT:          {RTLIB::MEMSET, RTLIB::impl____memset}, // ___memset
-// CHECK-NEXT:      });
+// CHECK-NEXT:      };
+// CHECK-EMPTY:
+// CHECK-NEXT:      for (const auto [Func, Impl] : LibraryCalls_isBarOS) {
+// CHECK-NEXT:        setLibcallImpl(Func, Impl);
+// CHECK-NEXT:      }
 // CHECK-EMPTY:
 // CHECK-NEXT:    }
 // CHECK-EMPTY:
@@ -231,25 +233,37 @@ def BlahLibrary : SystemRuntimeLibrary<isBlahArch, (add calloc, LibraryWithCondi
 // CHECK-NEXT: }
 // CHECK-EMPTY:
 // CHECK-NEXT: if (TT.getArch() == Triple::buzz) {
-// CHECK-NEXT:    setLibcallsImpl({
+// CHECK-NEXT:    static const LibcallImplPair LibraryCalls[] = {
 // CHECK-NEXT:        {RTLIB::SHL_I32, RTLIB::impl___ashlsi3}, // __ashlsi3
 // CHECK-NEXT:        {RTLIB::SQRT_F80, RTLIB::impl_sqrtl_f80}, // sqrtl
 // CHECK-NEXT:        {RTLIB::SRL_I64, RTLIB::impl___lshrdi3}, // __lshrdi3
-// CHECK-NEXT:    });
+// CHECK-NEXT:    };
+// CHECK-EMPTY:
+// CHECK-NEXT:    for (const auto [Func, Impl] : LibraryCalls) {
+// CHECK-NEXT:      setLibcallImpl(Func, Impl);
+// CHECK-NEXT:    }
 // CHECK-EMPTY:
 // CHECK-NEXT:   return;
 // CHECK-NEXT: }
 // CHECK-EMPTY:
 // CHECK-NEXT: if (TT.getArch() == Triple::foo) {
-// CHECK-NEXT:    setLibcallsImpl({
+// CHECK-NEXT:    static const LibcallImplPair LibraryCalls[] = {
 // CHECK-NEXT:        {RTLIB::BZERO, RTLIB::impl_bzero}, // bzero
 // CHECK-NEXT:        {RTLIB::SQRT_F128, RTLIB::impl_sqrtl_f128}, // sqrtl
-// CHECK-NEXT:    });
+// CHECK-NEXT:    };
+// CHECK-EMPTY:
+// CHECK-NEXT:    for (const auto [Func, Impl] : LibraryCalls) {
+// CHECK-NEXT:      setLibcallImpl(Func, Impl);
+// CHECK-NEXT:    }
 // CHECK-EMPTY:
 // CHECK-NEXT:    if (TT.getOS() == Triple::bar) {
-// CHECK-NEXT:      setLibcallsImpl({
+// CHECK-NEXT:      static const LibcallImplPair LibraryCalls_isBarOS[] = {
 // CHECK-NEXT:          {RTLIB::MEMSET, RTLIB::impl____memset}, // ___memset
-// CHECK-NEXT:      });
+// CHECK-NEXT:      };
+// CHECK-EMPTY:
+// CHECK-NEXT:      for (const auto [Func, Impl] : LibraryCalls_isBarOS) {
+// CHECK-NEXT:        setLibcallImpl(Func, Impl);
+// CHECK-NEXT:      }
 // CHECK-EMPTY:
 // CHECK-NEXT:    }
 // CHECK-EMPTY:
@@ -257,12 +271,16 @@ def BlahLibrary : SystemRuntimeLibrary<isBlahArch, (add calloc, LibraryWithCondi
 // CHECK-NEXT:  }
 // CHECK-EMPTY:
 // CHECK-NEXT: if (TT.getArch() == Triple::simple) {
-// CHECK-NEXT:    setLibcallsImpl({
+// CHECK-NEXT:    static const LibcallImplPair LibraryCalls[] = {
 // CHECK-NEXT:        {RTLIB::CALLOC, RTLIB::impl_calloc}, // calloc
 // CHECK-NEXT:        {RTLIB::SHL_I32, RTLIB::impl___ashlsi3}, // __ashlsi3
 // CHECK-NEXT:        {RTLIB::SQRT_F80, RTLIB::impl_sqrtl_f80}, // sqrtl
 // CHECK-NEXT:        {RTLIB::SRL_I64, RTLIB::impl___lshrdi3}, // __lshrdi3
-// CHECK-NEXT:    });
+// CHECK-NEXT:    };
+// CHECK-EMPTY:
+// CHECK-NEXT:    for (const auto [Func, Impl] : LibraryCalls) {
+// CHECK-NEXT:      setLibcallImpl(Func, Impl);
+// CHECK-NEXT:    }
 // CHECK-EMPTY:
 // CHECK-NEXT:   return;
 // CHECK-NEXT: }
diff --git a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll
index 3a306a4..ccef61d 100644
--- a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll
+++ b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll
@@ -12,7 +12,7 @@ target triple = "sparcv9-unknown-unknown"
 define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) {
 ; CHECK-LABEL: @test_cmpxchg_i8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence release
 ; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
 ; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -45,7 +45,7 @@ define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) {
 ; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i8, i1 } poison, i8 [[EXTRACTED]], 0
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence acquire
 ; CHECK-NEXT:    [[RET:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
@@ -58,7 +58,7 @@ entry:
 define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) {
 ; CHECK-LABEL: @test_cmpxchg_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence release
 ; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
 ; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -91,7 +91,7 @@ define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) {
 ; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence acquire
 ; CHECK-NEXT:    [[RET:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0
 ; CHECK-NEXT:    ret i16 [[RET]]
 ;
@@ -104,7 +104,7 @@ entry:
 define i16 @test_add_i16(ptr %arg, i16 %val) {
 ; CHECK-LABEL: @test_add_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence release
 ; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
 ; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -130,7 +130,7 @@ define i16 @test_add_i16(ptr %arg, i16 %val) {
 ; CHECK:       atomicrmw.end:
 ; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
 ; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence acquire
 ; CHECK-NEXT:    ret i16 [[EXTRACTED]]
 ;
 entry:
@@ -141,7 +141,7 @@ entry:
 define i16 @test_xor_i16(ptr %arg, i16 %val) {
 ; CHECK-LABEL: @test_xor_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence release
 ; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
 ; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -164,7 +164,7 @@ define i16 @test_xor_i16(ptr %arg, i16 %val) {
 ; CHECK:       atomicrmw.end:
 ; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
 ; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence acquire
 ; CHECK-NEXT:    ret i16 [[EXTRACTED]]
 ;
 entry:
@@ -175,7 +175,7 @@ entry:
 define i16 @test_or_i16(ptr %arg, i16 %val) {
 ; CHECK-LABEL: @test_or_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence release
 ; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
 ; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -198,7 +198,7 @@ define i16 @test_or_i16(ptr %arg, i16 %val) {
 ; CHECK:       atomicrmw.end:
 ; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
 ; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence acquire
 ; CHECK-NEXT:    ret i16 [[EXTRACTED]]
 ;
 entry:
@@ -209,7 +209,7 @@ entry:
 define i16 @test_and_i16(ptr %arg, i16 %val) {
 ; CHECK-LABEL: @test_and_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence release
 ; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
 ; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -233,7 +233,7 @@ define i16 @test_and_i16(ptr %arg, i16 %val) {
 ; CHECK:       atomicrmw.end:
 ; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
 ; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence acquire
 ; CHECK-NEXT:    ret i16 [[EXTRACTED]]
 ;
 entry:
@@ -244,7 +244,7 @@ entry:
 define i16 @test_min_i16(ptr %arg, i16 %val) {
 ; CHECK-LABEL: @test_min_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence release
 ; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
 ; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP0]], 3
@@ -272,7 +272,7 @@ define i16 @test_min_i16(ptr %arg, i16 %val) {
 ; CHECK:       atomicrmw.end:
 ; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
 ; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence acquire
 ; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
 ;
 entry:
@@ -282,7 +282,7 @@ entry:
 
 define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) {
 ; CHECK-LABEL: @test_atomicrmw_fadd_f16(
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence release
 ; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
 ; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
@@ -312,7 +312,7 @@ define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) {
 ; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
 ; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
 ; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16 [[EXTRACTED3]] to half
-; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    fence acquire
 ; CHECK-NEXT:    ret half [[TMP8]]
 ;
   %res = atomicrmw fadd ptr %ptr, half %value seq_cst
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
index e7b7dff..4173c32 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
@@ -1,11 +1,12 @@
 ; REQUIRES: asserts
 ; RUN: opt -S -passes=dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE
 
 ; This test checks that the analysis identifies all threadable paths in a
 ; simple CFG. A threadable path includes a list of basic blocks, the exit
 ; state, and the block that determines the next state.
 ; < path of BBs that form a cycle > [ state, determinator ]
-define i32 @test1(i32 %num) {
+define i32 @test1(i32 %num) !prof !0{
 ; CHECK: < case2 for.inc for.body > [ 1, for.inc ]
 ; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ]
 ; CHECK-NEXT: < case1 for.inc for.body > [ 2, for.inc ]
@@ -25,8 +26,11 @@ case1:
   br label %for.inc
 
 case2:
+  ; PROFILE-LABEL: @test1
+  ; PROFILE-LABEL: case2:
+  ; PROFILE: br i1 %cmp, label %for.inc.jt1, label %sel.si.unfold.false.jt2, !prof !1 ; !1 = !{!"branch_weights", i32 3, i32 5}
   %cmp = icmp eq i32 %count, 50
-  %sel = select i1 %cmp, i32 1, i32 2
+  %sel = select i1 %cmp, i32 1, i32 2, !prof !1
   br label %for.inc
 
 for.inc:
@@ -182,7 +186,7 @@ bb66:                                             ; preds = %bb59
 }
 
 ; Value %init is not predictable but it's okay since it is the value initial to the switch.
-define i32 @initial.value.positive1(i32 %init) {
+define i32 @initial.value.positive1(i32 %init) !prof !0 {
 ; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ]
 ; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ]
 ; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ]
@@ -241,3 +245,6 @@ infloop.i:
 exit:
   ret i32 0
 }
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 3, i32 5}
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
index ad05684..092c854 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
 
 ; These tests check that the DFA jump threading transformation is applied
@@ -301,7 +301,7 @@ end:
   ret void
 }
 
-define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) {
+define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) !prof !0 {
 ; CHECK-LABEL: @pr106083_invalidBBarg_fold(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB1:%.*]]
@@ -310,7 +310,7 @@ define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) {
 ; CHECK-NEXT:    br i1 [[NOT:%.*]], label [[BB7_JT0]], label [[BB2:%.*]]
 ; CHECK:       BB2:
 ; CHECK-NEXT:    store i16 0, ptr [[D:%.*]], align 2
-; CHECK-NEXT:    br i1 [[CMP2:%.*]], label [[BB7:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]]
+; CHECK-NEXT:    br i1 [[CMP2:%.*]], label [[BB7:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       spec.select.si.unfold.false:
 ; CHECK-NEXT:    br label [[BB9]]
 ; CHECK:       spec.select.si.unfold.false.jt0:
@@ -357,7 +357,7 @@ BB1:                                              ; preds = %BB1.backedge, %BB7,
 
 BB2:                                              ; preds = %BB1
   store i16 0, ptr %d, align 2
-  %spec.select = select i1 %cmp2, i32 %sel, i32 0
+  %spec.select = select i1 %cmp2, i32 %sel, i32 0, !prof !1
   br label %BB7
 
 BB7:                                              ; preds = %BB2, %BB1
@@ -444,3 +444,10 @@ select.unfold:                                    ; preds = %bb1, %.loopexit6
 bb2:                                              ; preds = %select.unfold
   unreachable
 }
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 3, i32 5}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
+;.
diff --git a/llvm/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll b/llvm/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll
index b139e07..acd0317 100644
--- a/llvm/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll
+++ b/llvm/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll
@@ -1,9 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
-;
 
 %0 = type { i64, i1 }
 
 define i64 @test1(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test1(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
 entry:
   %uadd = tail call %0 @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
   %uadd.0 = extractvalue %0 %uadd, 0
@@ -11,11 +21,17 @@ entry:
   ret i64 %add1
 }
 
-; CHECK-LABEL: @test1(
-; CHECK-NOT: add1
-; CHECK: ret
-
 define i64 @test2(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test2(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
 entry:
   %usub = tail call %0 @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
   %usub.0 = extractvalue %0 %usub, 0
@@ -23,11 +39,17 @@ entry:
   ret i64 %sub1
 }
 
-; CHECK-LABEL: @test2(
-; CHECK-NOT: sub1
-; CHECK: ret
-
 define i64 @test3(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test3(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
 entry:
   %umul = tail call %0 @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
   %umul.0 = extractvalue %0 %umul, 0
@@ -35,11 +57,17 @@ entry:
   ret i64 %mul1
 }
 
-; CHECK-LABEL: @test3(
-; CHECK-NOT: mul1
-; CHECK: ret
-
 define i64 @test4(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test4(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
 entry:
   %sadd = tail call %0 @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
   %sadd.0 = extractvalue %0 %sadd, 0
@@ -47,11 +75,17 @@ entry:
   ret i64 %add1
 }
 
-; CHECK-LABEL: @test4(
-; CHECK-NOT: add1
-; CHECK: ret
-
 define i64 @test5(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test5(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
 entry:
   %ssub = tail call %0 @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
   %ssub.0 = extractvalue %0 %ssub, 0
@@ -59,11 +93,17 @@ entry:
   ret i64 %sub1
 }
 
-; CHECK-LABEL: @test5(
-; CHECK-NOT: sub1
-; CHECK: ret
-
 define i64 @test6(i64 %a, i64 %b) nounwind ssp {
+; CHECK-LABEL: define i64 @test6(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[TMP0]] poison, i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [[TMP0]] [[TMP2]], i1 [[TMP3]], 1
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
 entry:
   %smul = tail call %0 @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
   %smul.0 = extractvalue %0 %smul, 0
@@ -71,10 +111,6 @@ entry:
   ret i64 %mul1
 }
 
-; CHECK-LABEL: @test6(
-; CHECK-NOT: mul1
-; CHECK: ret
-
 declare void @exit(i32) noreturn
 declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 declare %0 @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
@@ -82,4 +118,3 @@ declare %0 @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
 declare %0 @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
 declare %0 @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
 declare %0 @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
-
diff --git a/llvm/test/Transforms/GVN/2011-09-07-TypeIdFor.ll b/llvm/test/Transforms/GVN/2011-09-07-TypeIdFor.ll
index 01cc3164..52e6a8e 100644
--- a/llvm/test/Transforms/GVN/2011-09-07-TypeIdFor.ll
+++ b/llvm/test/Transforms/GVN/2011-09-07-TypeIdFor.ll
@@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
+
 %struct.__fundamental_type_info_pseudo = type { %struct.__type_info_pseudo }
 %struct.__type_info_pseudo = type { ptr, ptr }
 
@@ -18,26 +20,70 @@ declare void @__cxa_end_catch()
 declare i32 @__gxx_personality_v0(i32, i64, ptr, ptr)
 
 define void @_Z3foov() uwtable personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: define void @_Z3foov(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] personality ptr @__gxx_personality_v0 {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    invoke void @_Z4barv()
+; CHECK-NEXT:            to label %[[RETURN:.*]] unwind label %[[LPAD:.*]]
+; CHECK:       [[LPAD]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            catch ptr @_ZTIi
+; CHECK-NEXT:            catch ptr @_ZTIb
+; CHECK-NEXT:            catch ptr @_ZTIi
+; CHECK-NEXT:            catch ptr @_ZTIb
+; CHECK-NEXT:    [[EXC_PTR2_I:%.*]] = extractvalue { ptr, i32 } [[TMP0]], 0
+; CHECK-NEXT:    [[FILTER3_I:%.*]] = extractvalue { ptr, i32 } [[TMP0]], 1
+; CHECK-NEXT:    [[TYPEID_I:%.*]] = tail call i32 @llvm.eh.typeid.for.p0(ptr @_ZTIi)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[FILTER3_I]], [[TYPEID_I]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PPAD:.*]], label %[[NEXT:.*]]
+; CHECK:       [[NEXT]]:
+; CHECK-NEXT:    [[TYPEID1_I:%.*]] = tail call i32 @llvm.eh.typeid.for.p0(ptr @_ZTIb)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[FILTER3_I]], [[TYPEID1_I]]
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[PPAD2:.*]], label %[[NEXT2:.*]]
+; CHECK:       [[PPAD]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[EXC_PTR2_I]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    tail call void @__cxa_end_catch() #[[ATTR0]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[PPAD2]]:
+; CHECK-NEXT:    [[D_2073_5_I:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[EXC_PTR2_I]]) #[[ATTR0]]
+; CHECK-NEXT:    tail call void @__cxa_end_catch() #[[ATTR0]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[NEXT2]]:
+; CHECK-NEXT:    call void @_Z7cleanupv()
+; CHECK-NEXT:    br i1 false, label %[[PPAD3:.*]], label %[[NEXT3:.*]]
+; CHECK:       [[NEXT3]]:
+; CHECK-NEXT:    br i1 false, label %[[PPAD4:.*]], label %[[UNWIND:.*]]
+; CHECK:       [[UNWIND]]:
+; CHECK-NEXT:    resume { ptr, i32 } [[TMP0]]
+; CHECK:       [[PPAD3]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[EXC_PTR2_I]]) #[[ATTR0]]
+; CHECK-NEXT:    tail call void @__cxa_end_catch() #[[ATTR0]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[PPAD4]]:
+; CHECK-NEXT:    [[D_2080_5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[EXC_PTR2_I]]) #[[ATTR0]]
+; CHECK-NEXT:    tail call void @__cxa_end_catch() #[[ATTR0]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   invoke void @_Z4barv()
-          to label %return unwind label %lpad
+  to label %return unwind label %lpad
 
 lpad:                                             ; preds = %entry
   %0 = landingpad { ptr, i32 }
-          catch ptr @_ZTIi
-          catch ptr @_ZTIb
-          catch ptr @_ZTIi
-          catch ptr @_ZTIb
+  catch ptr @_ZTIi
+  catch ptr @_ZTIb
+  catch ptr @_ZTIi
+  catch ptr @_ZTIb
   %exc_ptr2.i = extractvalue { ptr, i32 } %0, 0
   %filter3.i = extractvalue { ptr, i32 } %0, 1
   %typeid.i = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi)
-; CHECK: call i32 @llvm.eh.typeid.for
   %1 = icmp eq i32 %filter3.i, %typeid.i
   br i1 %1, label %ppad, label %next
 
 next:                                             ; preds = %lpad
   %typeid1.i = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIb)
-; CHECK: call i32 @llvm.eh.typeid.for
   %2 = icmp eq i32 %filter3.i, %typeid1.i
   br i1 %2, label %ppad2, label %next2
 
@@ -54,7 +100,6 @@ ppad2:                                            ; preds = %next
 next2:                                            ; preds = %next
   call void @_Z7cleanupv()
   %typeid = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi)
-; CHECK-NOT: call i32 @llvm.eh.typeid.for
   %4 = icmp eq i32 %filter3.i, %typeid
   br i1 %4, label %ppad3, label %next3
 
diff --git a/llvm/test/Transforms/GVN/2012-05-22-PreCrash.ll b/llvm/test/Transforms/GVN/2012-05-22-PreCrash.ll
index 28b7178..205dff7 100644
--- a/llvm/test/Transforms/GVN/2012-05-22-PreCrash.ll
+++ b/llvm/test/Transforms/GVN/2012-05-22-PreCrash.ll
@@ -1,7 +1,35 @@
-; RUN: opt < %s -passes=gvn
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=gvn -S | FileCheck %s
+
 ; PR12858
 
 define void @fn5(i16 signext %p1, i8 signext %p2, i1 %arg) nounwind uwtable {
+; CHECK-LABEL: define void @fn5(
+; CHECK-SAME: i16 signext [[P1:%.*]], i8 signext [[P2:%.*]], i1 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 [[ARG]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[DOTPRE:%.*]] = sext i16 [[P1]] to i32
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[P1]] to i32
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[CONV1_PRE_PHI:%.*]] = phi i32 [ [[CONV]], %[[IF_ELSE]] ], [ [[DOTPRE]], %[[IF_THEN]] ]
+; CHECK-NEXT:    br i1 [[ARG]], label %[[IF_THEN3:.*]], label %[[IF_ELSE4:.*]]
+; CHECK:       [[IF_THEN3]]:
+; CHECK-NEXT:    [[DOTPRE1:%.*]] = sext i8 [[P2]] to i32
+; CHECK-NEXT:    br label %[[IF_END12:.*]]
+; CHECK:       [[IF_ELSE4]]:
+; CHECK-NEXT:    [[CONV7:%.*]] = sext i8 [[P2]] to i32
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[CONV1_PRE_PHI]], [[CONV7]]
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[IF_END12]]
+; CHECK:       [[IF_THEN10]]:
+; CHECK-NEXT:    br label %[[IF_END12]]
+; CHECK:       [[IF_END12]]:
+; CHECK-NEXT:    [[CONV13_PRE_PHI:%.*]] = phi i32 [ [[CONV7]], %[[IF_THEN10]] ], [ [[CONV7]], %[[IF_ELSE4]] ], [ [[DOTPRE1]], %[[IF_THEN3]] ]
+; CHECK-NEXT:    ret void
+;
 entry:
   br i1 %arg, label %if.else, label %if.then
 
diff --git a/llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather-inseltpoison.ll b/llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather-inseltpoison.ll
index c2b123b..aeb3de9 100644
--- a/llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather-inseltpoison.ll
+++ b/llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather-inseltpoison.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 
 declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> , <2 x ptr> , i32 , <2 x i1> )
@@ -5,14 +6,29 @@ declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x
 
 ; This test ensures that masked scatter and gather operations, which take vectors of pointers,
 ; do not have pointer aliasing ignored when being processed.
-; No scatter/gather calls should end up eliminated
-; CHECK: llvm.masked.gather
-; CHECK: llvm.masked.gather
-; CHECK: llvm.masked.scatter
-; CHECK: llvm.masked.gather
-; CHECK: llvm.masked.scatter
-; CHECK: llvm.masked.gather
+; No scatter/gather calls should end up eliminated.
+
 define spir_kernel void @test(<2 x ptr> %in1, <2 x ptr> %in2, ptr %out) {
+; CHECK-LABEL: define spir_kernel void @test(
+; CHECK-SAME: <2 x ptr> [[IN1:%.*]], <2 x ptr> [[IN2:%.*]], ptr [[OUT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP_0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP_1:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP_I:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP_0]], i32 0
+; CHECK-NEXT:    [[TMP:%.*]] = insertelement <2 x ptr> [[TMP_I]], ptr [[TMP_1]], i32 1
+; CHECK-NEXT:    [[IN1_V:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[IN1]], i32 1, <2 x i1> splat (i1 true), <2 x i32> undef)
+; CHECK-NEXT:    [[IN2_V:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[IN2]], i32 1, <2 x i1> splat (i1 true), <2 x i32> undef)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[IN1_V]], <2 x ptr> [[TMP]], i32 1, <2 x i1> splat (i1 true))
+; CHECK-NEXT:    [[TMP_V_0:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP]], i32 1, <2 x i1> splat (i1 true), <2 x i32> undef)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[IN2_V]], <2 x ptr> [[TMP]], i32 1, <2 x i1> splat (i1 true))
+; CHECK-NEXT:    [[TMP_V_1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP]], i32 1, <2 x i1> splat (i1 true), <2 x i32> undef)
+; CHECK-NEXT:    [[TMP_V_1_0:%.*]] = extractelement <2 x i32> [[TMP_V_1]], i32 0
+; CHECK-NEXT:    [[TMP_V_1_1:%.*]] = extractelement <2 x i32> [[TMP_V_1]], i32 1
+; CHECK-NEXT:    store i32 [[TMP_V_1_0]], ptr [[OUT]], align 4
+; CHECK-NEXT:    [[OUT_1:%.*]] = getelementptr i32, ptr [[OUT]], i32 1
+; CHECK-NEXT:    store i32 [[TMP_V_1_1]], ptr [[OUT_1]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   ; Just some temporary storage
   %tmp.0 = alloca i32
diff --git a/llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll b/llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
index e18f388..4c00060 100644
--- a/llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
+++ b/llvm/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 
 declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> , <2 x ptr> , i32 , <2 x i1> )
@@ -5,14 +6,29 @@ declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x
 
 ; This test ensures that masked scatter and gather operations, which take vectors of pointers,
 ; do not have pointer aliasing ignored when being processed.
-; No scatter/gather calls should end up eliminated
-; CHECK: llvm.masked.gather
-; CHECK: llvm.masked.gather
-; CHECK: llvm.masked.scatter
-; CHECK: llvm.masked.gather
-; CHECK: llvm.masked.scatter
-; CHECK: llvm.masked.gather
+; No scatter/gather calls should end up eliminated.
+
 define spir_kernel void @test(<2 x ptr> %in1, <2 x ptr> %in2, ptr %out) {
+; CHECK-LABEL: define spir_kernel void @test(
+; CHECK-SAME: <2 x ptr> [[IN1:%.*]], <2 x ptr> [[IN2:%.*]], ptr [[OUT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP_0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP_1:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP_I:%.*]] = insertelement <2 x ptr> undef, ptr [[TMP_0]], i32 0
+; CHECK-NEXT:    [[TMP:%.*]] = insertelement <2 x ptr> [[TMP_I]], ptr [[TMP_1]], i32 1
+; CHECK-NEXT:    [[IN1_V:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[IN1]], i32 1, <2 x i1> splat (i1 true), <2 x i32> undef)
+; CHECK-NEXT:    [[IN2_V:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[IN2]], i32 1, <2 x i1> splat (i1 true), <2 x i32> undef)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[IN1_V]], <2 x ptr> [[TMP]], i32 1, <2 x i1> splat (i1 true))
+; CHECK-NEXT:    [[TMP_V_0:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP]], i32 1, <2 x i1> splat (i1 true), <2 x i32> undef)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> [[IN2_V]], <2 x ptr> [[TMP]], i32 1, <2 x i1> splat (i1 true))
+; CHECK-NEXT:    [[TMP_V_1:%.*]] = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> [[TMP]], i32 1, <2 x i1> splat (i1 true), <2 x i32> undef)
+; CHECK-NEXT:    [[TMP_V_1_0:%.*]] = extractelement <2 x i32> [[TMP_V_1]], i32 0
+; CHECK-NEXT:    [[TMP_V_1_1:%.*]] = extractelement <2 x i32> [[TMP_V_1]], i32 1
+; CHECK-NEXT:    store i32 [[TMP_V_1_0]], ptr [[OUT]], align 4
+; CHECK-NEXT:    [[OUT_1:%.*]] = getelementptr i32, ptr [[OUT]], i32 1
+; CHECK-NEXT:    store i32 [[TMP_V_1_1]], ptr [[OUT_1]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   ; Just some temporary storage
   %tmp.0 = alloca i32
diff --git a/llvm/test/Transforms/GVN/MemdepMiscompile.ll b/llvm/test/Transforms/GVN/MemdepMiscompile.ll
index cb9b011..7c8accb 100644
--- a/llvm/test/Transforms/GVN/MemdepMiscompile.ll
+++ b/llvm/test/Transforms/GVN/MemdepMiscompile.ll
@@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
@@ -7,14 +9,38 @@ target triple = "x86_64-apple-macosx10.7.0"
 ; Make sure we do not replace load %shouldExit in while.cond.backedge
 ; with a phi node where the value from while.body is 0.
 define i32 @test() nounwind ssp {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SHOULDEXIT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TASKSIDLE:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, ptr [[SHOULDEXIT]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[TASKSIDLE]], align 4
+; CHECK-NEXT:    call void @CTestInitialize(ptr [[TASKSIDLE]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[WHILE_BODY_LR_PH:.*]], label %[[ENTRY_WHILE_END_CRIT_EDGE:.*]]
+; CHECK:       [[ENTRY_WHILE_END_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[WHILE_END:.*]]
+; CHECK:       [[WHILE_BODY_LR_PH]]:
+; CHECK-NEXT:    br label %[[WHILE_BODY:.*]]
+; CHECK:       [[WHILE_BODY]]:
+; CHECK-NEXT:    call void @RunInMode(i32 100) #[[ATTR1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[TASKSIDLE]], align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[WHILE_COND_BACKEDGE:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    store i32 0, ptr [[TASKSIDLE]], align 4
+; CHECK-NEXT:    call void @TimerCreate(ptr [[SHOULDEXIT]]) #[[ATTR1]]
+; CHECK-NEXT:    br label %[[WHILE_COND_BACKEDGE]]
+; CHECK:       [[WHILE_COND_BACKEDGE]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SHOULDEXIT]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[WHILE_BODY]], label %[[WHILE_COND_WHILE_END_CRIT_EDGE:.*]]
+; CHECK:       [[WHILE_COND_WHILE_END_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[WHILE_END]]
+; CHECK:       [[WHILE_END]]:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
-; CHECK: test()
-; CHECK: while.body:
-; CHECK: call void @RunInMode
-; CHECK: br i1 %tobool, label %while.cond.backedge, label %if.then
-; CHECK: while.cond.backedge:
-; CHECK: load i32, ptr %shouldExit
-; CHECK: br i1 %cmp, label %while.body
   %shouldExit = alloca i32, align 4
   %tasksIdle = alloca i32, align 4
   store i32 0, ptr %shouldExit, align 4
diff --git a/llvm/test/Transforms/GVN/basic-undef-test.ll b/llvm/test/Transforms/GVN/basic-undef-test.ll
index d12c3db..459ef25 100644
--- a/llvm/test/Transforms/GVN/basic-undef-test.ll
+++ b/llvm/test/Transforms/GVN/basic-undef-test.ll
@@ -1,15 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
-; ModuleID = 'test3.ll'
+
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
+; RLE over the second load.
 define i32 @main(ptr %foo)  {
+; CHECK-LABEL: define i32 @main(
+; CHECK-SAME: ptr [[FOO:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[FOO]], align 4
+; CHECK-NEXT:    store i32 5, ptr undef, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
 entry:
-; CHECK: load i32, ptr %foo, align 4
   %0 = load i32, ptr %foo, align 4
   store i32 5, ptr undef, align 4
-; CHECK-NOT: load i32, ptr %foo, align 4
   %1 = load i32, ptr %foo, align 4
-; CHECK: add i32 %0, %0
   %2 = add i32 %0, %1
   ret i32 %2
 }
diff --git a/llvm/test/Transforms/GVN/bitcast-of-call.ll b/llvm/test/Transforms/GVN/bitcast-of-call.ll
index 6c4e8d2..3f40085 100644
--- a/llvm/test/Transforms/GVN/bitcast-of-call.ll
+++ b/llvm/test/Transforms/GVN/bitcast-of-call.ll
@@ -1,13 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
+
 ; PR2213
 
 define ptr @f(ptr %x) {
+; CHECK-LABEL: define ptr @f(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP:%.*]] = call ptr @m(i32 12)
+; CHECK-NEXT:    ret ptr [[TMP]]
+;
 entry:
-        %tmp = call ptr @m( i32 12 )            ; <ptr> [#uses=2]
-        %tmp1 = bitcast ptr %tmp to ptr                ; <ptr> [#uses=0]
-        %tmp2 = bitcast ptr %tmp to ptr                ; <ptr> [#uses=0]
-; CHECK-NOT: %tmp2
-        ret ptr %tmp2
+  %tmp = call ptr @m(i32 12)            ; <ptr> [#uses=2]
+  %tmp1 = bitcast ptr %tmp to ptr                ; <ptr> [#uses=0]
+  %tmp2 = bitcast ptr %tmp to ptr                ; <ptr> [#uses=0]
+  ret ptr %tmp2
 }
 
 declare ptr @m(i32)
diff --git a/llvm/test/Transforms/GVN/br-identical.ll b/llvm/test/Transforms/GVN/br-identical.ll
index 9997e01..5266889 100644
--- a/llvm/test/Transforms/GVN/br-identical.ll
+++ b/llvm/test/Transforms/GVN/br-identical.ll
@@ -1,8 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S -o - %s | FileCheck %s
 
 ; If a branch has two identical successors, we cannot declare either dead.
-
 define void @widget(i1 %p) {
+; CHECK-LABEL: define void @widget(
+; CHECK-SAME: i1 [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[T2:%.*]], %[[BB7:.*]] ]
+; CHECK-NEXT:    [[T2]] = add i64 [[T1]], 1
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult i64 0, [[T2]]
+; CHECK-NEXT:    br i1 [[T3]], label %[[BB3:.*]], label %[[BB4:.*]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[T4:%.*]] = call i64 @f()
+; CHECK-NEXT:    br label %[[BB4]]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[FOO:%.*]] = phi i64 [ [[T4]], %[[BB3]] ], [ 0, %[[BB2]] ]
+; CHECK-NEXT:    br i1 [[P]], label %[[BB5:.*]], label %[[BB6:.*]]
+; CHECK:       [[BB5]]:
+; CHECK-NEXT:    br i1 true, label %[[BB7]], label %[[BB7]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    br i1 true, label %[[BB7]], label %[[BB7]]
+; CHECK:       [[BB7]]:
+; CHECK-NEXT:    br i1 [[P]], label %[[BB2]], label %[[BB8:.*]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %bb2
 
@@ -17,7 +41,6 @@ bb3:
   br label %bb4
 
 bb4:
-  ; CHECK-NOT: phi {{.*}} undef
   %foo = phi i64 [ %t4, %bb3 ], [ 0, %bb2 ]
   br i1 %p, label %bb5, label %bb6
 
diff --git a/llvm/test/Transforms/GVN/calls-nonlocal.ll b/llvm/test/Transforms/GVN/calls-nonlocal.ll
index e891545..4340d57 100644
--- a/llvm/test/Transforms/GVN/calls-nonlocal.ll
+++ b/llvm/test/Transforms/GVN/calls-nonlocal.ll
@@ -1,75 +1,78 @@
-; Two occurrences of strlen should be zapped.
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
+
+; Two occurrences of strlen should be zapped.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
 
 define i32 @test(i32 %g, ptr %P) nounwind  {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i32 [[G:%.*]], ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @strlen(ptr [[P]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 100
+; CHECK-NEXT:    [[TMP34:%.*]] = zext i1 [[TMP3]] to i8
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[BB:.*]], label %[[BB6:.*]]
+; CHECK:       [[BB]]:
+; CHECK-NEXT:    br label %[[BB27:.*]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[G]], 42
+; CHECK-NEXT:    br i1 false, label %[[BB14:.*]], label %[[BB16:.*]]
+; CHECK:       [[BB14]]:
+; CHECK-NEXT:    br label %[[BB27]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[TMP18:%.*]] = mul i32 [[TMP8]], 2
+; CHECK-NEXT:    br i1 false, label %[[BB24:.*]], label %[[BB26:.*]]
+; CHECK:       [[BB24]]:
+; CHECK-NEXT:    br label %[[BB27]]
+; CHECK:       [[BB26]]:
+; CHECK-NEXT:    br label %[[BB27]]
+; CHECK:       [[BB27]]:
+; CHECK-NEXT:    [[TMP_0:%.*]] = phi i32 [ 11, %[[BB26]] ], [ poison, %[[BB24]] ], [ poison, %[[BB14]] ], [ [[G]], %[[BB]] ]
+; CHECK-NEXT:    ret i32 [[TMP_0]]
+;
 entry:
-	%tmp2 = call i32 @strlen( ptr %P ) nounwind readonly 		; <i32> [#uses=1]
-	%tmp3 = icmp eq i32 %tmp2, 100		; <i1> [#uses=1]
-	%tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
-	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
-	br i1 %toBool, label %bb, label %bb6
+  %tmp2 = call i32 @strlen( ptr %P ) nounwind readonly 		; <i32> [#uses=1]
+  %tmp3 = icmp eq i32 %tmp2, 100		; <i1> [#uses=1]
+  %tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
+  %toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+  br i1 %toBool, label %bb, label %bb6
 
 bb:		; preds = %entry
-	br label %bb27
+  br label %bb27
 
 bb6:		; preds = %entry
-	%tmp8 = add i32 %g, 42		; <i32> [#uses=2]
-	%tmp10 = call i32 @strlen( ptr %P ) nounwind readonly 		; <i32> [#uses=1]
-	%tmp11 = icmp eq i32 %tmp10, 100		; <i1> [#uses=1]
-	%tmp1112 = zext i1 %tmp11 to i8		; <i8> [#uses=1]
-	%toBool13 = icmp ne i8 %tmp1112, 0		; <i1> [#uses=1]
-	br i1 %toBool13, label %bb14, label %bb16
+  %tmp8 = add i32 %g, 42		; <i32> [#uses=2]
+  %tmp10 = call i32 @strlen( ptr %P ) nounwind readonly 		; <i32> [#uses=1]
+  %tmp11 = icmp eq i32 %tmp10, 100		; <i1> [#uses=1]
+  %tmp1112 = zext i1 %tmp11 to i8		; <i8> [#uses=1]
+  %toBool13 = icmp ne i8 %tmp1112, 0		; <i1> [#uses=1]
+  br i1 %toBool13, label %bb14, label %bb16
 
 bb14:		; preds = %bb6
-	br label %bb27
+  br label %bb27
 
 bb16:		; preds = %bb6
-	%tmp18 = mul i32 %tmp8, 2		; <i32> [#uses=1]
-	%tmp20 = call i32 @strlen( ptr %P ) nounwind readonly 		; <i32> [#uses=1]
-	%tmp21 = icmp eq i32 %tmp20, 100		; <i1> [#uses=1]
-	%tmp2122 = zext i1 %tmp21 to i8		; <i8> [#uses=1]
-	%toBool23 = icmp ne i8 %tmp2122, 0		; <i1> [#uses=1]
-	br i1 %toBool23, label %bb24, label %bb26
+  %tmp18 = mul i32 %tmp8, 2		; <i32> [#uses=1]
+  %tmp20 = call i32 @strlen( ptr %P ) nounwind readonly 		; <i32> [#uses=1]
+  %tmp21 = icmp eq i32 %tmp20, 100		; <i1> [#uses=1]
+  %tmp2122 = zext i1 %tmp21 to i8		; <i8> [#uses=1]
+  %toBool23 = icmp ne i8 %tmp2122, 0		; <i1> [#uses=1]
+  br i1 %toBool23, label %bb24, label %bb26
 
 bb24:		; preds = %bb16
-	br label %bb27
+  br label %bb27
 
 bb26:		; preds = %bb16
-	br label %bb27
+  br label %bb27
 
 bb27:		; preds = %bb26, %bb24, %bb14, %bb
-	%tmp.0 = phi i32 [ 11, %bb26 ], [ %tmp18, %bb24 ], [ %tmp8, %bb14 ], [ %g, %bb ]		; <i32> [#uses=1]
-	br label %return
+  %tmp.0 = phi i32 [ 11, %bb26 ], [ %tmp18, %bb24 ], [ %tmp8, %bb14 ], [ %g, %bb ]		; <i32> [#uses=1]
+  br label %return
 
 return:		; preds = %bb27
-	ret i32 %tmp.0
+  ret i32 %tmp.0
 }
 
-; CHECK: define i32 @test(i32 %g, ptr %P) #0 {
-; CHECK: entry:
-; CHECK:   %tmp2 = call i32 @strlen(ptr %P) #1
-; CHECK:   %tmp3 = icmp eq i32 %tmp2, 100
-; CHECK:   %tmp34 = zext i1 %tmp3 to i8
-; CHECK:   br i1 %tmp3, label %bb, label %bb6
-; CHECK: bb:
-; CHECK:   br label %bb27
-; CHECK: bb6:
-; CHECK:   %tmp8 = add i32 %g, 42
-; CHECK:   br i1 false, label %bb14, label %bb16
-; CHECK: bb14:
-; CHECK:   br label %bb27
-; CHECK: bb16:
-; CHECK:   %tmp18 = mul i32 %tmp8, 2
-; CHECK:   br i1 false, label %bb24, label %bb26
-; CHECK: bb24:
-; CHECK:   br label %bb27
-; CHECK: bb26:
-; CHECK:   br label %bb27
-; CHECK: bb27:
-; CHECK:   %tmp.0 = phi i32 [ 11, %bb26 ], [ poison, %bb24 ], [ poison, %bb14 ], [ %g, %bb ]
-; CHECK:   ret i32 %tmp.0
-; CHECK: }
 
-declare i32 @strlen(ptr) nounwind readonly 
+declare i32 @strlen(ptr) nounwind readonly
diff --git a/llvm/test/Transforms/GVN/calls-readonly.ll b/llvm/test/Transforms/GVN/calls-readonly.ll
index b4855e4..2fb5621 100644
--- a/llvm/test/Transforms/GVN/calls-readonly.ll
+++ b/llvm/test/Transforms/GVN/calls-readonly.ll
@@ -1,10 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
+
 ; Should delete the second call to strlen even though the intervening strchr call exists.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
 define ptr @test(ptr %P, ptr %Q, i32 %x, i32 %y) nounwind readonly {
+; CHECK-LABEL: define ptr @test(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @strlen(ptr [[P]]), !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[BB:.*]], label %[[BB1:.*]]
+; CHECK:       [[BB]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = sdiv i32 [[X]], [[Y]]
+; CHECK-NEXT:    br label %[[BB1]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi i32 [ [[TMP2]], %[[BB]] ], [ [[X]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call ptr @strchr(ptr [[Q]], i32 97)
+; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[X_ADDR_0]], [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP3]], i32 [[X_ADDR_0]]
+; CHECK-NEXT:    ret ptr [[TMP5]]
+;
 entry:
   %0 = tail call i32 @strlen(ptr %P), !prof !0    ; <i32> [#uses=2]
   %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
@@ -24,21 +42,6 @@ bb1:                                              ; preds = %bb, %entry
   ret ptr %6
 }
 
-; CHECK: define ptr @test(ptr %P, ptr %Q, i32 %x, i32 %y) #0 {
-; CHECK: entry:
-; CHECK-NEXT:   %0 = tail call i32 @strlen(ptr %P), !prof !0
-; CHECK-NEXT:   %1 = icmp eq i32 %0, 0
-; CHECK-NEXT:   br i1 %1, label %bb, label %bb1
-; CHECK: bb:
-; CHECK-NEXT:   %2 = sdiv i32 %x, %y
-; CHECK-NEXT:   br label %bb1
-; CHECK: bb1:
-; CHECK-NEXT:   %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ]
-; CHECK-NEXT:   %3 = tail call ptr @strchr(ptr %Q, i32 97)
-; CHECK-NEXT:   %4 = add i32 %x_addr.0, %0
-; CHECK-NEXT:   %5 = getelementptr i8, ptr %3, i32 %x_addr.0
-; CHECK-NEXT:   ret ptr %5
-; CHECK: }
 
 declare i32 @strlen(ptr) nounwind readonly
 
@@ -46,3 +49,6 @@ declare ptr @strchr(ptr, i32) nounwind readonly
 
 !0 = !{!"branch_weights", i32 95}
 !1 = !{!"branch_weights", i32 95}
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i64 190}
+;.
diff --git a/llvm/test/Transforms/GVN/cond_br.ll b/llvm/test/Transforms/GVN/cond_br.ll
index fb84b62..10ee3a0 100644
--- a/llvm/test/Transforms/GVN/cond_br.ll
+++ b/llvm/test/Transforms/GVN/cond_br.ll
@@ -1,12 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
 @y = external global i32
 @z = external global i32
 
 ; Function Attrs: nounwind ssp uwtable
 define void @foo(i32 %x) {
-; CHECK: @foo(i32 %x)
-; CHECK: %.pre = load i32, ptr @y
-; CHECK: call void @bar(i32 %.pre)
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr @y, align 4
+; CHECK-NEXT:    br i1 false, label %[[IF_THEN:.*]], label %[[ENTRY_IF_END_CRIT_EDGE:.*]]
+; CHECK:       [[ENTRY_IF_END_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X]], 3
+; CHECK-NEXT:    store i32 [[ADD]], ptr @y, align 4
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    tail call void @bar(i32 [[DOTPRE]])
+; CHECK-NEXT:    ret void
+;
 
   %t = sub i32 %x, %x
   %.pre = load i32, ptr @y, align 4
@@ -28,9 +41,22 @@ if.end:                                           ; preds = %entry.if.end_crit_e
 }
 
 define void @foo2(i32 %x) {
-; CHECK: @foo2(i32 %x)
-; CHECK: %.pre = load i32, ptr @y
-; CHECK: tail call void @bar(i32 %.pre)
+; CHECK-LABEL: define void @foo2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr @y, align 4
+; CHECK-NEXT:    br i1 false, label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X]], 3
+; CHECK-NEXT:    store i32 [[ADD]], ptr @y, align 4
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    store i32 1, ptr @z, align 4
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    tail call void @bar(i32 [[DOTPRE]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %t = sub i32 %x, %x
   %.pre = load i32, ptr @y, align 4
diff --git a/llvm/test/Transforms/GVN/cond_br2.ll b/llvm/test/Transforms/GVN/cond_br2.ll
index ff80328..6ceec95 100644
--- a/llvm/test/Transforms/GVN/cond_br2.ll
+++ b/llvm/test/Transforms/GVN/cond_br2.ll
@@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 %"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", [1 x %"union.llvm::SmallVectorBase::U"] }
@@ -10,10 +12,77 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Function Attrs: ssp uwtable
 define void @_Z4testv() #0 personality ptr @__gxx_personality_v0 {
-; CHECK: @_Z4testv()
-; CHECK: invoke.cont:
-; CHECK: br i1 true, label %new.notnull.i11, label %if.end.i14
-; CHECK: Retry.i10:
+; CHECK-LABEL: define void @_Z4testv(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__gxx_personality_v0 {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SV:%.*]] = alloca %"class.llvm::SmallVector", align 16
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[SV]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    [[FIRSTEL_I_I_I_I_I_I:%.*]] = getelementptr inbounds %"class.llvm::SmallVector", ptr [[SV]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 3
+; CHECK-NEXT:    store ptr [[FIRSTEL_I_I_I_I_I_I]], ptr [[SV]], align 16, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[ENDX_I_I_I_I_I_I:%.*]] = getelementptr inbounds %"class.llvm::SmallVector", ptr [[SV]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
+; CHECK-NEXT:    store ptr [[FIRSTEL_I_I_I_I_I_I]], ptr [[ENDX_I_I_I_I_I_I]], align 8, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    [[CAPACITYX_I_I_I_I_I_I:%.*]] = getelementptr inbounds %"class.llvm::SmallVector", ptr [[SV]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
+; CHECK-NEXT:    [[ADD_PTR_I_I_I_I2_I_I:%.*]] = getelementptr inbounds %"union.llvm::SmallVectorBase::U", ptr [[FIRSTEL_I_I_I_I_I_I]], i64 2
+; CHECK-NEXT:    store ptr [[ADD_PTR_I_I_I_I2_I_I]], ptr [[CAPACITYX_I_I_I_I_I_I]], align 16, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    br i1 true, label %[[RETRY_I:.*]], label %[[IF_END_I:.*]]
+; CHECK:       [[RETRY_I]]:
+; CHECK-NEXT:    br i1 false, label %[[RETRY_I_INVOKE_CONT_CRIT_EDGE:.*]], label %[[NEW_NOTNULL_I:.*]]
+; CHECK:       [[RETRY_I_INVOKE_CONT_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[INVOKE_CONT:.*]]
+; CHECK:       [[NEW_NOTNULL_I]]:
+; CHECK-NEXT:    store i32 1, ptr [[FIRSTEL_I_I_I_I_I_I]], align 4, !tbaa [[INT_TBAA4:![0-9]+]]
+; CHECK-NEXT:    br label %[[INVOKE_CONT]]
+; CHECK:       [[IF_END_I]]:
+; CHECK-NEXT:    invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(ptr [[SV]], i64 0, i64 4)
+; CHECK-NEXT:            to [[DOTNOEXC:label %.*]] unwind label %[[LPAD:.*]]
+; CHECK:       [[_NOEXC:.*:]]
+; CHECK-NEXT:    [[DOTPRE_I:%.*]] = load ptr, ptr [[ENDX_I_I_I_I_I_I]], align 8, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    br label %[[RETRY_I]]
+; CHECK:       [[INVOKE_CONT]]:
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i8, ptr [[FIRSTEL_I_I_I_I_I_I]], i64 4
+; CHECK-NEXT:    store ptr [[ADD_PTR_I]], ptr [[ENDX_I_I_I_I_I_I]], align 8, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    br i1 true, label %[[NEW_NOTNULL_I11:.*]], label %[[IF_END_I14:.*]]
+; CHECK:       [[RETRY_I10:.*]]:
+; CHECK-NEXT:    [[DOTPRE_I13:%.*]] = load ptr, ptr [[ENDX_I_I_I_I_I_I]], align 8, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    [[NEW_ISNULL_I9:%.*]] = icmp eq ptr [[DOTPRE_I13]], null
+; CHECK-NEXT:    br i1 [[NEW_ISNULL_I9]], label %[[RETRY_I10_INVOKE_CONT2_CRIT_EDGE:.*]], label %[[RETRY_I10_NEW_NOTNULL_I11_CRIT_EDGE:.*]]
+; CHECK:       [[RETRY_I10_NEW_NOTNULL_I11_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[NEW_NOTNULL_I11]]
+; CHECK:       [[RETRY_I10_INVOKE_CONT2_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[INVOKE_CONT2:.*]]
+; CHECK:       [[NEW_NOTNULL_I11]]:
+; CHECK-NEXT:    store i32 2, ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA4]]
+; CHECK-NEXT:    br label %[[INVOKE_CONT2]]
+; CHECK:       [[IF_END_I14]]:
+; CHECK-NEXT:    invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(ptr [[SV]], i64 0, i64 4)
+; CHECK-NEXT:            to label %[[RETRY_I10]] unwind label %[[LPAD]]
+; CHECK:       [[INVOKE_CONT2]]:
+; CHECK-NEXT:    [[ADD_PTR_I12:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_I]], i64 4
+; CHECK-NEXT:    store ptr [[ADD_PTR_I12]], ptr [[ENDX_I_I_I_I_I_I]], align 8, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    invoke void @_Z1gRN4llvm11SmallVectorIiLj8EEE(ptr [[SV]])
+; CHECK-NEXT:            to label %[[INVOKE_CONT3:.*]] unwind label %[[LPAD]]
+; CHECK:       [[INVOKE_CONT3]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SV]], align 16, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    [[CMP_I_I_I_I19:%.*]] = icmp eq ptr [[TMP0]], [[FIRSTEL_I_I_I_I_I_I]]
+; CHECK-NEXT:    br i1 [[CMP_I_I_I_I19]], label %[[_ZN4LLVM11SMALLVECTORIILJ8EED1EV_EXIT21:.*]], label %[[IF_THEN_I_I_I20:.*]]
+; CHECK:       [[IF_THEN_I_I_I20]]:
+; CHECK-NEXT:    call void @free(ptr [[TMP0]]) #[[ATTR4]]
+; CHECK-NEXT:    br label %[[_ZN4LLVM11SMALLVECTORIILJ8EED1EV_EXIT21]]
+; CHECK:       [[_ZN4LLVM11SMALLVECTORIILJ8EED1EV_EXIT21]]:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[SV]]) #[[ATTR4]]
+; CHECK-NEXT:    ret void
+; CHECK:       [[LPAD]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[SV]], align 16, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    [[CMP_I_I_I_I:%.*]] = icmp eq ptr [[TMP2]], [[FIRSTEL_I_I_I_I_I_I]]
+; CHECK-NEXT:    br i1 [[CMP_I_I_I_I]], label %[[EH_RESUME:.*]], label %[[IF_THEN_I_I_I:.*]]
+; CHECK:       [[IF_THEN_I_I_I]]:
+; CHECK-NEXT:    call void @free(ptr [[TMP2]]) #[[ATTR4]]
+; CHECK-NEXT:    br label %[[EH_RESUME]]
+; CHECK:       [[EH_RESUME]]:
+; CHECK-NEXT:    resume { ptr, i32 } [[TMP1]]
+;
 
 entry:
   %sv = alloca %"class.llvm::SmallVector", align 16
@@ -42,7 +111,7 @@ new.notnull.i:                                    ; preds = %Retry.i
 
 if.end.i:                                         ; preds = %entry
   invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(ptr %sv, i64 0, i64 4)
-          to label %.noexc unwind label %lpad
+  to label %.noexc unwind label %lpad
 
 .noexc:                                           ; preds = %if.end.i
   %.pre.i = load ptr, ptr %EndX.i, align 8, !tbaa !4
@@ -67,14 +136,14 @@ new.notnull.i11:                                  ; preds = %invoke.cont, %Retry
 
 if.end.i14:                                       ; preds = %invoke.cont
   invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(ptr %sv, i64 0, i64 4)
-          to label %Retry.i10 unwind label %lpad
+  to label %Retry.i10 unwind label %lpad
 
 invoke.cont2:                                     ; preds = %new.notnull.i11, %Retry.i10
   %4 = phi ptr [ null, %Retry.i10 ], [ %3, %new.notnull.i11 ]
   %add.ptr.i12 = getelementptr inbounds i8, ptr %4, i64 4
   store ptr %add.ptr.i12, ptr %EndX.i, align 8, !tbaa !4
   invoke void @_Z1gRN4llvm11SmallVectorIiLj8EEE(ptr %sv)
-          to label %invoke.cont3 unwind label %lpad
+  to label %invoke.cont3 unwind label %lpad
 
 invoke.cont3:                                     ; preds = %invoke.cont2
   %5 = load ptr, ptr %sv, align 16, !tbaa !4
@@ -91,7 +160,7 @@ _ZN4llvm11SmallVectorIiLj8EED1Ev.exit21:          ; preds = %invoke.cont3, %if.t
 
 lpad:                                             ; preds = %if.end.i14, %if.end.i, %invoke.cont2
   %6 = landingpad { ptr, i32 }
-          cleanup
+  cleanup
   %7 = load ptr, ptr %sv, align 16, !tbaa !4
   %cmp.i.i.i.i = icmp eq ptr %7, %FirstEl.i.i.i.i.i.i
   br i1 %cmp.i.i.i.i, label %eh.resume, label %if.then.i.i.i
@@ -130,3 +199,11 @@ attributes #3 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "n
 !3 = !{!"int", !1}
 !4 = !{!0, !0, i64 0}
 !5 = !{!3, !3, i64 0}
+;.
+; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[INT_TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"int", [[META2]]}
+;.
diff --git a/llvm/test/Transforms/GVN/crash-no-aa.ll b/llvm/test/Transforms/GVN/crash-no-aa.ll
index 10e6374..f396c10 100644
--- a/llvm/test/Transforms/GVN/crash-no-aa.ll
+++ b/llvm/test/Transforms/GVN/crash-no-aa.ll
@@ -1,10 +1,19 @@
-; RUN: opt -disable-basic-aa -passes=gvn -S < %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -disable-basic-aa -passes=gvn -S -o - < %s | FileCheck %s
+
+; PR5744
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-freebsd8.0"
 
-; PR5744
 define i32 @test1(ptr %P) {
+; CHECK-LABEL: define i32 @test1(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    store i16 42, ptr [[P]], align 2
+; CHECK-NEXT:    [[P3:%.*]] = getelementptr { i16, i32 }, ptr [[P]], i32 0, i32 1
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[P3]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
   %P2 = getelementptr {i16, i32}, ptr %P, i32 0, i32 0
   store i16 42, ptr %P2
 
diff --git a/llvm/test/Transforms/GVN/critical-edge-split-failure.ll b/llvm/test/Transforms/GVN/critical-edge-split-failure.ll
index 8eac5fe..40ebe14 100644
--- a/llvm/test/Transforms/GVN/critical-edge-split-failure.ll
+++ b/llvm/test/Transforms/GVN/critical-edge-split-failure.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S -o - %s | FileCheck %s
 
 %struct.sk_buff = type opaque
@@ -10,6 +11,31 @@
 declare void @llvm.assume(i1 noundef)
 
 define dso_local void @l2tp_recv_dequeue() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @l2tp_recv_dequeue() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @l2tp_recv_dequeue_session, align 4
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[CONV]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @l2tp_recv_dequeue_session_2, align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    br label %[[FOR_COND:.*]]
+; CHECK:       [[FOR_COND]]:
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ null, %[[IF_END:.*]] ]
+; CHECK-NEXT:    store ptr [[STOREMERGE]], ptr @l2tp_recv_dequeue_skb, align 8
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label %[[IF_END]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STOREMERGE]], align 4
+; CHECK-NEXT:    store i32 [[TMP3]], ptr @l2tp_recv_dequeue_session_0, align 4
+; CHECK-NEXT:    callbr void asm sideeffect "", "!i,~{dirflag},~{fpsr},~{flags}"()
+; CHECK-NEXT:            to label %[[ASM_FALLTHROUGH_I:.*]] [label %if.end]
+; CHECK:       [[ASM_FALLTHROUGH_I]]:
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[STOREMERGE]], align 4
+; CHECK-NEXT:    [[TOBOOL2_NOT:%.*]] = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TOBOOL2_NOT]])
+; CHECK-NEXT:    br label %[[FOR_COND]]
+;
 entry:
   %0 = load i32, ptr @l2tp_recv_dequeue_session, align 4
   %conv = sext i32 %0 to i64
@@ -29,10 +55,8 @@ if.then:                                          ; preds = %for.cond
 ; Splitting the critical edge from if.then to if.end will fail, but should not
 ; cause an infinite loop in GVN. If we can one day split edges of callbr
 ; indirect targets, great!
-; CHECK: callbr void asm sideeffect "", "!i,~{dirflag},~{fpsr},~{flags}"()
-; CHECK-NEXT: to label %asm.fallthrough.i [label %if.end]
   callbr void asm sideeffect "", "!i,~{dirflag},~{fpsr},~{flags}"()
-          to label %asm.fallthrough.i [label %if.end]
+  to label %asm.fallthrough.i [label %if.end]
 
 asm.fallthrough.i:                                ; preds = %if.then
   br label %if.end
@@ -43,4 +67,3 @@ if.end:                                           ; preds = %asm.fallthrough.i,
   tail call void @llvm.assume(i1 %tobool2.not)
   br label %for.cond
 }
-
diff --git a/llvm/test/Transforms/GVN/dbg-redundant-load.ll b/llvm/test/Transforms/GVN/dbg-redundant-load.ll
index 1ba4e8b..094467e 100644
--- a/llvm/test/Transforms/GVN/dbg-redundant-load.ll
+++ b/llvm/test/Transforms/GVN/dbg-redundant-load.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 ; Check that the redundant load from %if.then is removed.
@@ -6,15 +7,21 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-; CHECK: @test_redundant_load(
-; CHECK-LABEL: entry:
-; CHECK-NEXT: load i32, ptr %Y, align 4, !dbg ![[LOC:[0-9]+]]
-; CHECK-LABEL: if.then:
-; CHECK-NOT: load
-; CHECK-LABEL: if.end:
-; CHECK: ![[LOC]] = !DILocation(line: 3, scope: !{{.*}})
-
 define i32 @test_redundant_load(i32 %X, ptr %Y) !dbg !6 {
+; CHECK-LABEL: define i32 @test_redundant_load(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[Y:%.*]]) !dbg [[DBG6:![0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Y]], align 4, !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[X]], -1, !dbg [[DBG9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]], !dbg [[DBG9]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP0]], !dbg [[DBG10:![0-9]+]]
+; CHECK-NEXT:    call void @foo(), !dbg [[DBG11:![0-9]+]]
+; CHECK-NEXT:    br label %[[IF_END]], !dbg [[DBG12:![0-9]+]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[RESULT_0:%.*]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[TMP0]], %[[ENTRY]] ]
+; CHECK-NEXT:    ret i32 [[RESULT_0]], !dbg [[DBG13:![0-9]+]]
+;
 entry:
   %0 = load i32, ptr %Y, align 4, !dbg !8
   %cmp = icmp sgt i32 %X, -1, !dbg !9
@@ -50,3 +57,16 @@ declare void @foo()
 !11 = !DILocation(line: 7, scope: !6)
 !12 = !DILocation(line: 8, scope: !6)
 !13 = !DILocation(line: 10, scope: !6)
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: [[META1:![0-9]+]], isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: [[META2:![0-9]+]])
+; CHECK: [[META1]] = !DIFile(filename: "test.cpp", directory: "")
+; CHECK: [[META2]] = !{}
+; CHECK: [[DBG6]] = distinct !DISubprogram(name: "test_redundant_load", scope: [[META1]], file: [[META1]], line: 2, type: [[META7:![0-9]+]], scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META2]])
+; CHECK: [[META7]] = !DISubroutineType(types: [[META2]])
+; CHECK: [[DBG8]] = !DILocation(line: 3, scope: [[DBG6]])
+; CHECK: [[DBG9]] = !DILocation(line: 5, scope: [[DBG6]])
+; CHECK: [[DBG10]] = !DILocation(line: 6, scope: [[DBG6]])
+; CHECK: [[DBG11]] = !DILocation(line: 7, scope: [[DBG6]])
+; CHECK: [[DBG12]] = !DILocation(line: 8, scope: [[DBG6]])
+; CHECK: [[DBG13]] = !DILocation(line: 10, scope: [[DBG6]])
+;.
diff --git a/llvm/test/Transforms/GVN/fake-use-constprop.ll b/llvm/test/Transforms/GVN/fake-use-constprop.ll
index 0e7ca10..85b7dc3 100644
--- a/llvm/test/Transforms/GVN/fake-use-constprop.ll
+++ b/llvm/test/Transforms/GVN/fake-use-constprop.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 ;
 ; The Global Value Numbering pass (GVN) propagates boolean values
@@ -33,11 +34,20 @@
 
 ;; GVN should propagate a constant value through to a regular call, but not to
 ;; a fake use, which should continue to track the original value.
-; CHECK: %[[CONV_VAR:[a-zA-Z0-9]+]] = fptosi
-; CHECK: call {{.+}} @bees(i8 0)
-; CHECK: call {{.+}} @llvm.fake.use(i8 %[[CONV_VAR]])
 
 define i32 @foo(float %f) optdebug {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: float [[F:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[F]] to i8
+; CHECK-NEXT:    [[TOBOOL3:%.*]] = icmp eq i8 [[CONV]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL3]], label %[[IF_END:.*]], label %[[LAB:.*]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    tail call void (...) @bees(i8 0)
+; CHECK-NEXT:    tail call void (...) @llvm.fake.use(i8 [[CONV]])
+; CHECK-NEXT:    br label %[[LAB]]
+; CHECK:       [[LAB]]:
+; CHECK-NEXT:    ret i32 1
+;
   %conv = fptosi float %f to i8
   %tobool3 = icmp eq i8 %conv, 0
   br i1 %tobool3, label %if.end, label %lab
diff --git a/llvm/test/Transforms/GVN/flags.ll b/llvm/test/Transforms/GVN/flags.ll
index 2e5aeed..3777e14 100644
--- a/llvm/test/Transforms/GVN/flags.ll
+++ b/llvm/test/Transforms/GVN/flags.ll
@@ -1,8 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 declare void @use(i1)
 
 define void @test1(float %x, float %y) {
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp oeq float [[Y]], [[X]]
+; CHECK-NEXT:    call void @use(i1 [[CMP1]])
+; CHECK-NEXT:    call void @use(i1 [[CMP1]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp1 = fcmp nnan oeq float %y, %x
   %cmp2 = fcmp oeq float %x, %y
@@ -10,9 +19,3 @@ entry:
   call void @use(i1 %cmp2)
   ret void
 }
-
-; CHECK-LABEL: define void @test1(
-; CHECK: %[[cmp:.*]] = fcmp oeq float %y, %x
-; CHECK-NEXT: call void @use(i1 %[[cmp]])
-; CHECK-NEXT: call void @use(i1 %[[cmp]])
-; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/GVN/fold-const-expr.ll b/llvm/test/Transforms/GVN/fold-const-expr.ll
index 9e1129e..edbfcda 100644
--- a/llvm/test/Transforms/GVN/fold-const-expr.ll
+++ b/llvm/test/Transforms/GVN/fold-const-expr.ll
@@ -1,12 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
+
 ; GVN failed to do constant expression folding and expanded
 ; them unfolded in many places, producing exponentially large const
 ; expressions. As a result, the compilation never fisished.
 ; This test checks that we are folding constant expression
 ; PR 28418
-; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 %2 = type { i32, i32, i32, i32, i32 }
 define i32 @_Z16vector3util_mainv(i32 %x, i32 %y)  {
+; CHECK-LABEL: define i32 @_Z16vector3util_mainv(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [[TMP0]], ptr [[TMP1]], i64 0, i32 1
+; CHECK-NEXT:    store <4 x i32> <i32 234567891, i32 345678912, i32 456789123, i32 0>, ptr [[TMP114]], align 4
+; CHECK-NEXT:    store i32 310393545, ptr [[TMP114]], align 4
+; CHECK-NEXT:    store i32 -383584258, ptr [[TMP114]], align 4
+; CHECK-NEXT:    store i32 -57163022, ptr [[TMP114]], align 4
+; CHECK-NEXT:    ret i32 0
+;
   %tmp1 = alloca %2, align 4
   %tmp114 = getelementptr inbounds %2, ptr %tmp1, i64 0, i32 1
   store <4 x i32> <i32 234567891, i32 345678912, i32 456789123, i32 0>, ptr %tmp114, align 4
@@ -37,7 +49,6 @@ define i32 @_Z16vector3util_mainv(i32 %x, i32 %y)  {
   %tmp1739 = shl i32 %tmp1738, 22
   %tmp1740 = xor i32 %tmp1739, %tmp1738
   store i32 %tmp1740, ptr %tmp1683, align 4
-; CHECK: store i32 310393545, ptr %tmp114, align 4
   %tmp1756 = getelementptr inbounds %2, ptr %tmp1, i64 0, i32 1
   %tmp1761 = load i32, ptr %tmp1756, align 4
   %tmp1766 = shl i32 %tmp1761, 5
@@ -65,7 +76,6 @@ define i32 @_Z16vector3util_mainv(i32 %x, i32 %y)  {
   %tmp1812 = shl i32 %tmp1811, 22
   %tmp1813 = xor i32 %tmp1812, %tmp1811
   store i32 %tmp1813, ptr %tmp1756, align 4
-; CHECK: store i32 -383584258, ptr %tmp114, align 4
   %tmp2645 = getelementptr inbounds %2, ptr %tmp1, i64 0, i32 1
   %tmp2650 = load i32, ptr %tmp2645, align 4
   %tmp2655 = shl i32 %tmp2650, 5
@@ -93,6 +103,5 @@ define i32 @_Z16vector3util_mainv(i32 %x, i32 %y)  {
   %tmp2701 = shl i32 %tmp2700, 22
   %tmp2702 = xor i32 %tmp2701, %tmp2700
   store i32 %tmp2702, ptr %tmp2645, align 4
-; CHECK: store i32 -57163022, ptr %tmp114, align 4
   ret i32 0
 }
diff --git a/llvm/test/Transforms/GVN/fpmath.ll b/llvm/test/Transforms/GVN/fpmath.ll
index 970dd89..2069faa 100644
--- a/llvm/test/Transforms/GVN/fpmath.ll
+++ b/llvm/test/Transforms/GVN/fpmath.ll
@@ -1,10 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 define double @test1(double %x, double %y) {
-; CHECK: @test1(double %x, double %y)
-; CHECK: %add1 = fadd double %x, %y
-; CHECK-NOT: fpmath
-; CHECK: %foo = fadd double %add1, %add1
+; CHECK-LABEL: define double @test1(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[X]], [[Y]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %add1 = fadd double %x, %y, !fpmath !0
   %add2 = fadd double %x, %y
   %foo = fadd double %add1, %add2
@@ -12,9 +15,12 @@ define double @test1(double %x, double %y) {
 }
 
 define double @test2(double %x, double %y) {
-; CHECK: @test2(double %x, double %y)
-; CHECK: %add1 = fadd double %x, %y, !fpmath !0
-; CHECK: %foo = fadd double %add1, %add1
+; CHECK-LABEL: define double @test2(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[X]], [[Y]], !fpmath [[META0:![0-9]+]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %add1 = fadd double %x, %y, !fpmath !0
   %add2 = fadd double %x, %y, !fpmath !0
   %foo = fadd double %add1, %add2
@@ -22,9 +28,12 @@ define double @test2(double %x, double %y) {
 }
 
 define double @test3(double %x, double %y) {
-; CHECK: @test3(double %x, double %y)
-; CHECK: %add1 = fadd double %x, %y, !fpmath !1
-; CHECK: %foo = fadd double %add1, %add1
+; CHECK-LABEL: define double @test3(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[X]], [[Y]], !fpmath [[META1:![0-9]+]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %add1 = fadd double %x, %y, !fpmath !1
   %add2 = fadd double %x, %y, !fpmath !0
   %foo = fadd double %add1, %add2
@@ -32,9 +41,12 @@ define double @test3(double %x, double %y) {
 }
 
 define double @test4(double %x, double %y) {
-; CHECK: @test4(double %x, double %y)
-; CHECK: %add1 = fadd double %x, %y, !fpmath !1
-; CHECK: %foo = fadd double %add1, %add1
+; CHECK-LABEL: define double @test4(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = fadd double [[X]], [[Y]], !fpmath [[META1]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %add1 = fadd double %x, %y, !fpmath !0
   %add2 = fadd double %x, %y, !fpmath !1
   %foo = fadd double %add1, %add2
@@ -42,9 +54,12 @@ define double @test4(double %x, double %y) {
 }
 
 define double @test5(double %x, double %y) {
-; CHECK: @test5(double %x, double %y)
-; CHECK: %neg1 = fneg double %x, !fpmath !1
-; CHECK: %foo = fadd double %neg1, %neg1
+; CHECK-LABEL: define double @test5(
+; CHECK-SAME: double [[X:%.*]], double [[Y:%.*]]) {
+; CHECK-NEXT:    [[NEG1:%.*]] = fneg double [[X]], !fpmath [[META1]]
+; CHECK-NEXT:    [[FOO:%.*]] = fadd double [[NEG1]], [[NEG1]]
+; CHECK-NEXT:    ret double [[FOO]]
+;
   %neg1 = fneg double %x, !fpmath !0
   %neg2 = fneg double %x, !fpmath !1
   %foo = fadd double %neg1, %neg2
@@ -53,3 +68,7 @@ define double @test5(double %x, double %y) {
 
 !0 = !{ float 5.0 }
 !1 = !{ float 2.5 }
+;.
+; CHECK: [[META0]] = !{float 5.000000e+00}
+; CHECK: [[META1]] = !{float 2.500000e+00}
+;.
diff --git a/llvm/test/Transforms/GVN/funclet.ll b/llvm/test/Transforms/GVN/funclet.ll
index 8ef4c96..34ed78f 100644
--- a/llvm/test/Transforms/GVN/funclet.ll
+++ b/llvm/test/Transforms/GVN/funclet.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc"
@@ -8,13 +9,35 @@ target triple = "i686-pc-windows-msvc"
 @"_TI1?AUA@@" = external constant %eh.ThrowInfo
 
 define i8 @f() personality ptr @__CxxFrameHandler3 {
+; CHECK-LABEL: define i8 @f() personality ptr @__CxxFrameHandler3 {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[B:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[C:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[B]], align 1
+; CHECK-NEXT:    store i8 13, ptr [[C]], align 1
+; CHECK-NEXT:    invoke void @_CxxThrowException(ptr [[B]], ptr nonnull @"_TI1?AUA@@")
+; CHECK-NEXT:            to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]]
+; CHECK:       [[CATCH_DISPATCH]]:
+; CHECK-NEXT:    [[CS1:%.*]] = catchswitch within none [label %catch] unwind to caller
+; CHECK:       [[CATCH:.*:]]
+; CHECK-NEXT:    [[CATCHPAD:%.*]] = catchpad within [[CS1]] [ptr null, i32 64, ptr null]
+; CHECK-NEXT:    store i8 5, ptr [[B]], align 1
+; CHECK-NEXT:    catchret from [[CATCHPAD]] to label %[[TRY_CONT:.*]]
+; CHECK:       [[TRY_CONT]]:
+; CHECK-NEXT:    [[LOAD_B:%.*]] = load i8, ptr [[B]], align 1
+; CHECK-NEXT:    [[LOAD_C:%.*]] = load i8, ptr [[C]], align 1
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LOAD_B]], [[LOAD_C]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+; CHECK:       [[UNREACHABLE]]:
+; CHECK-NEXT:    unreachable
+;
 entry:
   %b = alloca i8
   %c = alloca i8
   store i8 42, ptr %b
   store i8 13, ptr %c
   invoke void @_CxxThrowException(ptr %b, ptr nonnull @"_TI1?AUA@@")
-          to label %unreachable unwind label %catch.dispatch
+  to label %unreachable unwind label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %entry
   %cs1 = catchswitch within none [label %catch] unwind to caller
@@ -33,11 +56,6 @@ try.cont:                                         ; preds = %catch
 unreachable:                                      ; preds = %entry
   unreachable
 }
-; CHECK-LABEL: define i8 @f(
-; CHECK:       %[[load_b:.*]] = load i8, ptr %b
-; CHECK-NEXT:  %[[load_c:.*]] = load i8, ptr %c
-; CHECK-NEXT:  %[[add:.*]] = add i8 %[[load_b]], %[[load_c]]
-; CHECK-NEXT:  ret i8 %[[add]]
 
 declare i32 @__CxxFrameHandler3(...)
 
diff --git a/llvm/test/Transforms/GVN/int_sideeffect.ll b/llvm/test/Transforms/GVN/int_sideeffect.ll
index 513533a..8754cc0 100644
--- a/llvm/test/Transforms/GVN/int_sideeffect.ll
+++ b/llvm/test/Transforms/GVN/int_sideeffect.ll
@@ -1,38 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S < %s -passes=gvn | FileCheck %s
 
 declare void @llvm.sideeffect()
 
 ; Store-to-load forwarding across a @llvm.sideeffect.
-
-; CHECK-LABEL: s2l
-; CHECK-NOT: load
 define float @s2l(ptr %p) {
-    store float 0.0, ptr %p
-    call void @llvm.sideeffect()
-    %t = load float, ptr %p
-    ret float %t
+; CHECK-LABEL: define float @s2l(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[P]], align 4
+; CHECK-NEXT:    call void @llvm.sideeffect()
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  store float 0.0, ptr %p
+  call void @llvm.sideeffect()
+  %t = load float, ptr %p
+  ret float %t
 }
 
 ; Redundant load elimination across a @llvm.sideeffect.
-
-; CHECK-LABEL: rle
-; CHECK: load
-; CHECK-NOT: load
 define float @rle(ptr %p) {
-    %r = load float, ptr %p
-    call void @llvm.sideeffect()
-    %s = load float, ptr %p
-    %t = fadd float %r, %s
-    ret float %t
+; CHECK-LABEL: define float @rle(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = load float, ptr [[P]], align 4
+; CHECK-NEXT:    call void @llvm.sideeffect()
+; CHECK-NEXT:    [[T:%.*]] = fadd float [[R]], [[R]]
+; CHECK-NEXT:    ret float [[T]]
+;
+  %r = load float, ptr %p
+  call void @llvm.sideeffect()
+  %s = load float, ptr %p
+  %t = fadd float %r, %s
+  ret float %t
 }
 
 ; LICM across a @llvm.sideeffect.
-
-; CHECK-LABEL: licm
-; CHECK: load
-; CHECK: loop:
-; CHECK-NOT: load
 define float @licm(i64 %n, ptr nocapture readonly %p) #0 {
+; CHECK-LABEL: define float @licm(
+; CHECK-SAME: i64 [[N:%.*]], ptr readonly captures(none) [[P:%.*]]) {
+; CHECK-NEXT:  [[BB0:.*]]:
+; CHECK-NEXT:    [[T3_PRE:%.*]] = load float, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[BB0]] ], [ [[T5:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi float [ 0.000000e+00, %[[BB0]] ], [ [[T4:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    call void @llvm.sideeffect()
+; CHECK-NEXT:    [[T4]] = fadd float [[SUM]], [[T3_PRE]]
+; CHECK-NEXT:    [[T5]] = add i64 [[I]], 1
+; CHECK-NEXT:    [[T6:%.*]] = icmp ult i64 [[T5]], [[N]]
+; CHECK-NEXT:    br i1 [[T6]], label %[[LOOP]], label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    ret float [[T4]]
+;
 bb0:
   br label %loop
 
diff --git a/llvm/test/Transforms/GVN/invariant.group.ll b/llvm/test/Transforms/GVN/invariant.group.ll
index 9c673ba..aba20ee 100644
--- a/llvm/test/Transforms/GVN/invariant.group.ll
+++ b/llvm/test/Transforms/GVN/invariant.group.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 
 %struct.A = type { ptr }
@@ -6,130 +7,175 @@
 
 @unknownPtr = external global i8
 
-; CHECK-LABEL: define i8 @simple() {
 define i8 @simple() {
+; CHECK-LABEL: define i8 @simple() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0:![0-9]+]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
-
-    %a = load i8, ptr %ptr, !invariant.group !0
-    %b = load i8, ptr %ptr, !invariant.group !0
-    %c = load i8, ptr %ptr, !invariant.group !0
-; CHECK: ret i8 42
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
+
+  %a = load i8, ptr %ptr, !invariant.group !0
+  %b = load i8, ptr %ptr, !invariant.group !0
+  %c = load i8, ptr %ptr, !invariant.group !0
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @optimizable1() {
 define i8 @optimizable1() {
+; CHECK-LABEL: define i8 @optimizable1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[PTR]])
+; CHECK-NEXT:    call void @foo(ptr [[PTR2]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    %ptr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
-    %a = load i8, ptr %ptr, !invariant.group !0
-    
-    call void @foo(ptr %ptr2); call to use %ptr2
-; CHECK: ret i8 42
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  %ptr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
+  %a = load i8, ptr %ptr, !invariant.group !0
+
+  call void @foo(ptr %ptr2); call to use %ptr2
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @optimizable2() {
 define i8 @optimizable2() {
+; CHECK-LABEL: define i8 @optimizable2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    store i8 13, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @bar(i8 13)
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
-    
-    store i8 13, ptr %ptr ; can't use this store with invariant.group
-    %a = load i8, ptr %ptr 
-    call void @bar(i8 %a) ; call to use %a
-    
-    call void @foo(ptr %ptr)
-    %b = load i8, ptr %ptr, !invariant.group !0
-    
-; CHECK: ret i8 42
-    ret i8 %b
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
+
+  store i8 13, ptr %ptr ; can't use this store with invariant.group
+  %a = load i8, ptr %ptr
+  call void @bar(i8 %a) ; call to use %a
+
+  call void @foo(ptr %ptr)
+  %b = load i8, ptr %ptr, !invariant.group !0
+
+  ret i8 %b
 }
 
-; CHECK-LABEL: define i1 @proveEqualityForStrip(
-define i1 @proveEqualityForStrip(ptr %a) {
 ; FIXME: The first call could be also removed by GVN. Right now
 ; DCE removes it. The second call is CSE'd with the first one.
-; CHECK: %b1 = call ptr @llvm.strip.invariant.group.p0(ptr %a)
+define i1 @proveEqualityForStrip(ptr %a) {
+; CHECK-LABEL: define i1 @proveEqualityForStrip(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    [[B1:%.*]] = call ptr @llvm.strip.invariant.group.p0(ptr [[A]])
+; CHECK-NEXT:    ret i1 true
+;
   %b1 = call ptr @llvm.strip.invariant.group.p0(ptr %a)
-; CHECK-NOT: llvm.strip.invariant.group
   %b2 = call ptr @llvm.strip.invariant.group.p0(ptr %a)
   %r = icmp eq ptr %b1, %b2
-; CHECK: ret i1 true
   ret i1 %r
 }
-; CHECK-LABEL: define i8 @unoptimizable1() {
+
 define i8 @unoptimizable1() {
+; CHECK-LABEL: define i8 @unoptimizable1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    ret i8 [[A]]
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr
-    call void @foo(ptr %ptr)
-    %a = load i8, ptr %ptr, !invariant.group !0
-; CHECK: ret i8 %a
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr
+  call void @foo(ptr %ptr)
+  %a = load i8, ptr %ptr, !invariant.group !0
+  ret i8 %a
 }
 
-; CHECK-LABEL: define void @indirectLoads() {
 define void @indirectLoads() {
+; CHECK-LABEL: define void @indirectLoads() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @getPointer(ptr null)
+; CHECK-NEXT:    call void @_ZN1AC1Ev(ptr [[CALL]])
+; CHECK-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[CALL]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    [[CMP_VTABLES:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2)
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_VTABLES]])
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[A]], align 8
+; CHECK-NEXT:    call void @_ZN1A3fooEv(ptr [[CALL]])
+; CHECK-NEXT:    call void @_ZN1A3fooEv(ptr [[CALL]])
+; CHECK-NEXT:    call void @_ZN1A3fooEv(ptr [[CALL]])
+; CHECK-NEXT:    call void @_ZN1A3fooEv(ptr [[CALL]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %a = alloca ptr, align 8
-  
-  %call = call ptr @getPointer(ptr null) 
+
+  %call = call ptr @getPointer(ptr null)
   call void @_ZN1AC1Ev(ptr %call)
-  
-; CHECK: %vtable = load {{.*}} !invariant.group
+
   %vtable = load ptr, ptr %call, align 8, !invariant.group !0
   %cmp.vtables = icmp eq ptr %vtable, getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2)
   call void @llvm.assume(i1 %cmp.vtables)
-  
+
   store ptr %call, ptr %a, align 8
   %0 = load ptr, ptr %a, align 8
 
-; CHECK: call void @_ZN1A3fooEv(
   %vtable1 = load ptr, ptr %0, align 8, !invariant.group !0
   %1 = load ptr, ptr %vtable1, align 8
   call void %1(ptr %0)
   %2 = load ptr, ptr %a, align 8
 
-; CHECK: call void @_ZN1A3fooEv(
   %vtable2 = load ptr, ptr %2, align 8, !invariant.group !0
   %3 = load ptr, ptr %vtable2, align 8
-  
+
   call void %3(ptr %2)
   %4 = load ptr, ptr %a, align 8
-  
+
   %vtable4 = load ptr, ptr %4, align 8, !invariant.group !0
   %5 = load ptr, ptr %vtable4, align 8
-; CHECK: call void @_ZN1A3fooEv(
   call void %5(ptr %4)
- 
+
   %vtable5 = load ptr, ptr %call, align 8, !invariant.group !0
   %6 = load ptr, ptr %vtable5, align 8
-; CHECK: call void @_ZN1A3fooEv(
   call void %6(ptr %4)
-  
+
   ret void
 }
 
-; CHECK-LABEL: define void @combiningBitCastWithLoad() {
 define void @combiningBitCastWithLoad() {
+; CHECK-LABEL: define void @combiningBitCastWithLoad() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @getPointer(ptr null)
+; CHECK-NEXT:    call void @_ZN1AC1Ev(ptr [[CALL]])
+; CHECK-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[CALL]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    [[CMP_VTABLES:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2)
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; CHECK-NEXT:    call void [[TMP0]](ptr [[CALL]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %a = alloca ptr, align 8
-  
-  %call = call ptr @getPointer(ptr null) 
+
+  %call = call ptr @getPointer(ptr null)
   call void @_ZN1AC1Ev(ptr %call)
-  
-; CHECK: %vtable = load {{.*}} !invariant.group
+
   %vtable = load ptr, ptr %call, align 8, !invariant.group !0
   %cmp.vtables = icmp eq ptr %vtable, getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2)
-  
+
   store ptr %call, ptr %a, align 8
-; CHECK-NOT: !invariant.group
   %0 = load ptr, ptr %a, align 8
 
   %vtable1 = load ptr, ptr %0, align 8, !invariant.group !0
@@ -139,185 +185,255 @@ entry:
   ret void
 }
 
-; CHECK-LABEL:define void @loadCombine() {
 define void @loadCombine() {
+; CHECK-LABEL: define void @loadCombine() {
+; CHECK-NEXT:  [[ENTER:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[A]])
+; CHECK-NEXT:    call void @bar(i8 [[A]])
+; CHECK-NEXT:    ret void
+;
 enter:
   %ptr = alloca i8
   store i8 42, ptr %ptr
   call void @foo(ptr %ptr)
-; CHECK: %[[A:.*]] = load i8, ptr %ptr, align 1, !invariant.group
   %a = load i8, ptr %ptr, !invariant.group !0
-; CHECK-NOT: load
   %b = load i8, ptr %ptr, !invariant.group !0
-; CHECK: call void @bar(i8 %[[A]])
   call void @bar(i8 %a)
-; CHECK: call void @bar(i8 %[[A]])
   call void @bar(i8 %b)
   ret void
 }
 
-; CHECK-LABEL: define void @loadCombine1() {
 define void @loadCombine1() {
+; CHECK-LABEL: define void @loadCombine1() {
+; CHECK-NEXT:  [[ENTER:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[C:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[C]])
+; CHECK-NEXT:    call void @bar(i8 [[C]])
+; CHECK-NEXT:    ret void
+;
 enter:
   %ptr = alloca i8
   store i8 42, ptr %ptr
   call void @foo(ptr %ptr)
-; CHECK: %[[D:.*]] = load i8, ptr %ptr, align 1, !invariant.group
   %c = load i8, ptr %ptr
-; CHECK-NOT: load
   %d = load i8, ptr %ptr, !invariant.group !0
-; CHECK: call void @bar(i8 %[[D]])
   call void @bar(i8 %c)
-; CHECK: call void @bar(i8 %[[D]])
   call void @bar(i8 %d)
   ret void
 }
 
-; CHECK-LABEL: define void @loadCombine2() {    
 define void @loadCombine2() {
+; CHECK-LABEL: define void @loadCombine2() {
+; CHECK-NEXT:  [[ENTER:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[E:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[E]])
+; CHECK-NEXT:    call void @bar(i8 [[E]])
+; CHECK-NEXT:    ret void
+;
 enter:
   %ptr = alloca i8
   store i8 42, ptr %ptr
   call void @foo(ptr %ptr)
-; CHECK: %[[E:.*]] = load i8, ptr %ptr, align 1, !invariant.group
   %e = load i8, ptr %ptr, !invariant.group !0
-; CHECK-NOT: load
   %f = load i8, ptr %ptr
-; CHECK: call void @bar(i8 %[[E]])
   call void @bar(i8 %e)
-; CHECK: call void @bar(i8 %[[E]])
   call void @bar(i8 %f)
   ret void
 }
 
-; CHECK-LABEL: define void @loadCombine3() {
 define void @loadCombine3() {
+; CHECK-LABEL: define void @loadCombine3() {
+; CHECK-NEXT:  [[ENTER:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[E:%.*]] = load i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[E]])
+; CHECK-NEXT:    call void @bar(i8 [[E]])
+; CHECK-NEXT:    ret void
+;
 enter:
   %ptr = alloca i8
   store i8 42, ptr %ptr
   call void @foo(ptr %ptr)
-; CHECK: %[[E:.*]] = load i8, ptr %ptr, align 1, !invariant.group
   %e = load i8, ptr %ptr, !invariant.group !0
-; CHECK-NOT: load
   %f = load i8, ptr %ptr, !invariant.group !0
-; CHECK: call void @bar(i8 %[[E]])
   call void @bar(i8 %e)
-; CHECK: call void @bar(i8 %[[E]])
   call void @bar(i8 %f)
   ret void
 }
 
-; CHECK-LABEL: define i8 @unoptimizable2() {
 define i8 @unoptimizable2() {
+; CHECK-LABEL: define i8 @unoptimizable2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    ret i8 [[A]]
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr
-    call void @foo(ptr %ptr)
-    %a = load i8, ptr %ptr
-    call void @foo(ptr %ptr)
-    %b = load i8, ptr %ptr, !invariant.group !0
-    
-; CHECK: ret i8 %a
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr
+  call void @foo(ptr %ptr)
+  %a = load i8, ptr %ptr
+  call void @foo(ptr %ptr)
+  %b = load i8, ptr %ptr, !invariant.group !0
+
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @unoptimizable3() {
 define i8 @unoptimizable3() {
+; CHECK-LABEL: define i8 @unoptimizable3() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @getPointer(ptr [[PTR]])
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[PTR2]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    ret i8 [[A]]
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    %ptr2 = call ptr @getPointer(ptr %ptr)
-    %a = load i8, ptr %ptr2, !invariant.group !0
-    
-; CHECK: ret i8 %a
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  %ptr2 = call ptr @getPointer(ptr %ptr)
+  %a = load i8, ptr %ptr2, !invariant.group !0
+
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @optimizable4() {
 define i8 @optimizable4() {
+; CHECK-LABEL: define i8 @optimizable4() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    [[PTR2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[PTR]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    %ptr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
-; CHECK-NOT: load
-    %a = load i8, ptr %ptr2, !invariant.group !0
-    
-; CHECK: ret i8 42
-    ret i8 %a
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  %ptr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
+  %a = load i8, ptr %ptr2, !invariant.group !0
+
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @volatile1() {
 define i8 @volatile1() {
+; CHECK-LABEL: define i8 @volatile1() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[B:%.*]] = load volatile i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @bar(i8 [[B]])
+; CHECK-NEXT:    [[C:%.*]] = load volatile i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[C]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
-    %a = load i8, ptr %ptr, !invariant.group !0
-    %b = load volatile i8, ptr %ptr
-; CHECK: call void @bar(i8 %b)
-    call void @bar(i8 %b)
-
-    %c = load volatile i8, ptr %ptr, !invariant.group !0
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
+  %a = load i8, ptr %ptr, !invariant.group !0
+  %b = load volatile i8, ptr %ptr
+  call void @bar(i8 %b)
+
+  %c = load volatile i8, ptr %ptr, !invariant.group !0
 ; FIXME: we could change %c to 42, preserving volatile load
-; CHECK: call void @bar(i8 %c)
-    call void @bar(i8 %c)
-; CHECK: ret i8 42
-    ret i8 %a
+  call void @bar(i8 %c)
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @volatile2() {
 define i8 @volatile2() {
+; CHECK-LABEL: define i8 @volatile2() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    [[B:%.*]] = load volatile i8, ptr [[PTR]], align 1
+; CHECK-NEXT:    call void @bar(i8 [[B]])
+; CHECK-NEXT:    [[C:%.*]] = load volatile i8, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[C]])
+; CHECK-NEXT:    ret i8 42
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
-    %a = load i8, ptr %ptr, !invariant.group !0
-    %b = load volatile i8, ptr %ptr
-; CHECK: call void @bar(i8 %b)
-    call void @bar(i8 %b)
-
-    %c = load volatile i8, ptr %ptr, !invariant.group !0
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
+  %a = load i8, ptr %ptr, !invariant.group !0
+  %b = load volatile i8, ptr %ptr
+  call void @bar(i8 %b)
+
+  %c = load volatile i8, ptr %ptr, !invariant.group !0
 ; FIXME: we could change %c to 42, preserving volatile load
-; CHECK: call void @bar(i8 %c)
-    call void @bar(i8 %c)
-; CHECK: ret i8 42
-    ret i8 %a
+  call void @bar(i8 %c)
+  ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @fun() {
 define i8 @fun() {
+; CHECK-LABEL: define i8 @fun() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 42, ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo(ptr [[PTR]])
+; CHECK-NEXT:    call void @bar(i8 42)
+; CHECK-NEXT:    [[NEWPTR:%.*]] = call ptr @getPointer(ptr [[PTR]])
+; CHECK-NEXT:    [[C:%.*]] = load i8, ptr [[NEWPTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[C]])
+; CHECK-NEXT:    [[UNKNOWNVALUE:%.*]] = load i8, ptr @unknownPtr, align 1
+; CHECK-NEXT:    store i8 [[UNKNOWNVALUE]], ptr [[PTR]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    [[NEWPTR2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[PTR]])
+; CHECK-NEXT:    ret i8 [[UNKNOWNVALUE]]
+;
 entry:
-    %ptr = alloca i8
-    store i8 42, ptr %ptr, !invariant.group !0
-    call void @foo(ptr %ptr)
-
-    %a = load i8, ptr %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
-; CHECK: call void @bar(i8 42)
-    call void @bar(i8 %a)
-
-    %newPtr = call ptr @getPointer(ptr %ptr) 
-    %c = load i8, ptr %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
-; CHECK: call void @bar(i8 %c)
-    call void @bar(i8 %c)
-    
-    %unknownValue = load i8, ptr @unknownPtr
+  %ptr = alloca i8
+  store i8 42, ptr %ptr, !invariant.group !0
+  call void @foo(ptr %ptr)
+
+  %a = load i8, ptr %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
+  call void @bar(i8 %a)
+
+  %newPtr = call ptr @getPointer(ptr %ptr)
+  %c = load i8, ptr %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
+  call void @bar(i8 %c)
+
+  %unknownValue = load i8, ptr @unknownPtr
 ; FIXME: Can assume that %unknownValue == 42
-; CHECK: store i8 %unknownValue, ptr %ptr, align 1, !invariant.group !0
-    store i8 %unknownValue, ptr %ptr, !invariant.group !0 
-
-    %newPtr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
-; CHECK-NOT: load
-    %d = load i8, ptr %newPtr2, !invariant.group !0
-; CHECK: ret i8 %unknownValue
-    ret i8 %d
+  store i8 %unknownValue, ptr %ptr, !invariant.group !0
+
+  %newPtr2 = call ptr @llvm.launder.invariant.group.p0(ptr %ptr)
+  %d = load i8, ptr %newPtr2, !invariant.group !0
+  ret i8 %d
 }
 
 ; This test checks if invariant.group understands gep with zeros
-; CHECK-LABEL: define void @testGEP0() {
 define void @testGEP0() {
+; CHECK-LABEL: define void @testGEP0() {
+; CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 8
+; CHECK-NEXT:    store ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), ptr [[A]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @_ZN1A3fooEv(ptr nonnull dereferenceable(8) [[A]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @unknownPtr, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[_Z1GR1A_EXIT:.*]], label %[[BB3:.*]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @_ZN1A3fooEv(ptr nonnull [[A]])
+; CHECK-NEXT:    br label %[[_Z1GR1A_EXIT]]
+; CHECK:       [[_Z1GR1A_EXIT]]:
+; CHECK-NEXT:    ret void
+;
   %a = alloca %struct.A, align 8
   store ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), ptr %a, align 8, !invariant.group !0
-; CHECK: call void @_ZN1A3fooEv(ptr nonnull dereferenceable(8) %a)
   call void @_ZN1A3fooEv(ptr nonnull dereferenceable(8) %a) ; This call may change vptr
   %1 = load i8, ptr @unknownPtr, align 4
   %2 = icmp eq i8 %1, 0
@@ -326,7 +442,6 @@ define void @testGEP0() {
 ; This should be devirtualized by invariant.group
   %4 = load ptr, ptr %a, align 8, !invariant.group !0
   %5 = load ptr, ptr %4, align 8
-; CHECK: call void @_ZN1A3fooEv(ptr nonnull %a)
   call void %5(ptr nonnull %a)
   br label %_Z1gR1A.exit
 
@@ -337,51 +452,86 @@ _Z1gR1A.exit:                                     ; preds = %0, %3
 ; Check if no optimizations are performed with global pointers.
 ; FIXME: we could do the optimizations if we would check if dependency comes
 ; from the same function.
-; CHECK-LABEL: define void @testGlobal() {
 define void @testGlobal() {
-; CHECK:  %a = load i8, ptr @unknownPtr, align 1, !invariant.group !0
-   %a = load i8, ptr @unknownPtr, !invariant.group !0
-   call void @foo2(ptr @unknownPtr, i8 %a)
-; CHECK:  %1 = load i8, ptr @unknownPtr, align 1, !invariant.group !0
-   %1 = load i8, ptr @unknownPtr, !invariant.group !0
-   call void @bar(i8 %1)
-
-   call void @fooBit(ptr @unknownPtr, i1 1)
+; CHECK-LABEL: define void @testGlobal() {
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr @unknownPtr, align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo2(ptr @unknownPtr, i8 [[A]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @unknownPtr, align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @bar(i8 [[TMP1]])
+; CHECK-NEXT:    call void @fooBit(ptr @unknownPtr, i1 true)
+; CHECK-NEXT:    [[TMP2:%.*]] = load i1, ptr @unknownPtr, align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @fooBit(ptr @unknownPtr, i1 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load i1, ptr @unknownPtr, align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @fooBit(ptr @unknownPtr, i1 [[TMP3]])
+; CHECK-NEXT:    ret void
+;
+  %a = load i8, ptr @unknownPtr, !invariant.group !0
+  call void @foo2(ptr @unknownPtr, i8 %a)
+  %1 = load i8, ptr @unknownPtr, !invariant.group !0
+  call void @bar(i8 %1)
+
+  call void @fooBit(ptr @unknownPtr, i1 1)
 ; Adding regex because of canonicalization of bitcasts
-; CHECK: %2 = load i1, ptr {{.*}}, !invariant.group !0
-   %2 = load i1, ptr @unknownPtr, !invariant.group !0
-   call void @fooBit(ptr @unknownPtr, i1 %2)
-; CHECK:  %3 = load i1, ptr {{.*}}, !invariant.group !0
-   %3 = load i1, ptr @unknownPtr, !invariant.group !0
-   call void @fooBit(ptr @unknownPtr, i1 %3)
-   ret void
+  %2 = load i1, ptr @unknownPtr, !invariant.group !0
+  call void @fooBit(ptr @unknownPtr, i1 %2)
+  %3 = load i1, ptr @unknownPtr, !invariant.group !0
+  call void @fooBit(ptr @unknownPtr, i1 %3)
+  ret void
 }
 ; And in the case it is not global
-; CHECK-LABEL: define void @testNotGlobal() {
 define void @testNotGlobal() {
-   %a = alloca i8
-   call void @foo(ptr %a)
-; CHECK:  %b = load i8, ptr %a, align 1, !invariant.group !0
-   %b = load i8, ptr %a, !invariant.group !0
-   call void @foo2(ptr %a, i8 %b)
-
-   %1 = load i8, ptr %a, !invariant.group !0
-; CHECK: call void @bar(i8 %b)
-   call void @bar(i8 %1)
-
-   call void @fooBit(ptr %a, i1 1)
-; CHECK: %1 = trunc i8 %b to i1
-   %2 = load i1, ptr %a, !invariant.group !0
-; CHECK-NEXT: call void @fooBit(ptr %a, i1 %1)
-   call void @fooBit(ptr %a, i1 %2)
-   %3 = load i1, ptr %a, !invariant.group !0
-; CHECK-NEXT: call void @fooBit(ptr %a, i1 %1)
-   call void @fooBit(ptr %a, i1 %3)
-   ret void
+; CHECK-LABEL: define void @testNotGlobal() {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @foo(ptr [[A]])
+; CHECK-NEXT:    [[B:%.*]] = load i8, ptr [[A]], align 1, !invariant.group [[META0]]
+; CHECK-NEXT:    call void @foo2(ptr [[A]], i8 [[B]])
+; CHECK-NEXT:    call void @bar(i8 [[B]])
+; CHECK-NEXT:    call void @fooBit(ptr [[A]], i1 true)
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i8 [[B]] to i1
+; CHECK-NEXT:    call void @fooBit(ptr [[A]], i1 [[TMP1]])
+; CHECK-NEXT:    call void @fooBit(ptr [[A]], i1 [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i8
+  call void @foo(ptr %a)
+  %b = load i8, ptr %a, !invariant.group !0
+  call void @foo2(ptr %a, i8 %b)
+
+  %1 = load i8, ptr %a, !invariant.group !0
+  call void @bar(i8 %1)
+
+  call void @fooBit(ptr %a, i1 1)
+  %2 = load i1, ptr %a, !invariant.group !0
+  call void @fooBit(ptr %a, i1 %2)
+  %3 = load i1, ptr %a, !invariant.group !0
+  call void @fooBit(ptr %a, i1 %3)
+  ret void
 }
 
-; CHECK-LABEL: define void @handling_loops()
 define void @handling_loops() {
+; CHECK-LABEL: define void @handling_loops() {
+; CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 8
+; CHECK-NEXT:    store ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), ptr [[A]], align 8, !invariant.group [[META0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr @unknownPtr, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i8 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], [[DOTLR_PH_I:label %.*]], label %[[_Z2G2R1A_EXIT:.*]]
+; CHECK:       [[_LR_PH_I:.*:]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i8 [[TMP1]], 1
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[DOT_CRIT_EDGE_PREHEADER:.*]], label %[[_Z2G2R1A_EXIT]]
+; CHECK:       [[__CRIT_EDGE_PREHEADER:.*:]]
+; CHECK-NEXT:    br label %[[DOT_CRIT_EDGE:.*]]
+; CHECK:       [[__CRIT_EDGE:.*:]]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i8 [ [[TMP5:%.*]], %[[DOT_CRIT_EDGE]] ], [ 1, %[[DOT_CRIT_EDGE_PREHEADER]] ]
+; CHECK-NEXT:    call void @_ZN1A3fooEv(ptr nonnull [[A]])
+; CHECK-NEXT:    [[TMP5]] = add nuw nsw i8 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr @unknownPtr, align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[DOT_CRIT_EDGE]], label %[[_Z2G2R1A_EXIT_LOOPEXIT:.*]]
+; CHECK:       [[_Z2G2R1A_EXIT_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[_Z2G2R1A_EXIT]]
+; CHECK:       [[_Z2G2R1A_EXIT]]:
+; CHECK-NEXT:    ret void
+;
   %a = alloca %struct.A, align 8
   store ptr getelementptr inbounds ([3 x ptr], ptr @_ZTV1A, i64 0, i64 2), ptr %a, align 8, !invariant.group !0
   %1 = load i8, ptr @unknownPtr, align 4
@@ -400,9 +550,7 @@ define void @handling_loops() {
   %5 = phi i8 [ %7, %._crit_edge ], [ 1, %._crit_edge.preheader ]
   %.pre = load ptr, ptr %a, align 8, !invariant.group !0
   %6 = load ptr, ptr %.pre, align 8
-  ; CHECK: call void @_ZN1A3fooEv(ptr nonnull %a)
   call void %6(ptr nonnull %a) #3
-  ; CHECK-NOT: call void %
   %7 = add nuw nsw i8 %5, 1
   %8 = load i8, ptr @unknownPtr, align 4
   %9 = icmp slt i8 %7, %8
@@ -432,3 +580,6 @@ declare void @llvm.assume(i1 %cmp.vtables)
 
 
 !0 = !{}
+;.
+; CHECK: [[META0]] = !{}
+;.
diff --git a/llvm/test/Transforms/GVN/invariant.start.ll b/llvm/test/Transforms/GVN/invariant.start.ll
index f2d7dd0..6f38197 100644
--- a/llvm/test/Transforms/GVN/invariant.start.ll
+++ b/llvm/test/Transforms/GVN/invariant.start.ll
@@ -1,16 +1,19 @@
-; Test to make sure llvm.invariant.start calls are not treated as clobbers.
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 
+; Test to make sure llvm.invariant.start calls are not treated as clobbers.
 
 declare ptr @llvm.invariant.start.p0(i64, ptr nocapture) nounwind readonly
 declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture) nounwind
 
 ; We forward store to the load across the invariant.start intrinsic
 define i8 @forward_store() {
-; CHECK-LABEL: @forward_store
-; CHECK: call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
-; CHECK-NOT: load
-; CHECK: ret i8 0
+; CHECK-LABEL: define i8 @forward_store() {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 0, ptr [[A]], align 1
+; CHECK-NEXT:    [[I:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr [[A]])
+; CHECK-NEXT:    ret i8 0
+;
   %a = alloca i8
   store i8 0, ptr %a
   %i = call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
@@ -23,10 +26,18 @@ declare i8 @dummy(ptr nocapture) nounwind readonly
 ; We forward store to the load in the non-local analysis case,
 ; i.e. invariant.start is in another basic block.
 define i8 @forward_store_nonlocal(i1 %cond) {
-; CHECK-LABEL: forward_store_nonlocal
-; CHECK: call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
-; CHECK: ret i8 0
-; CHECK: ret i8 %val
+; CHECK-LABEL: define i8 @forward_store_nonlocal(
+; CHECK-SAME: i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 0, ptr [[A]], align 1
+; CHECK-NEXT:    [[I:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr [[A]])
+; CHECK-NEXT:    br i1 [[COND]], label %[[LOADBLOCK:.*]], label %[[EXIT:.*]]
+; CHECK:       [[LOADBLOCK]]:
+; CHECK-NEXT:    ret i8 0
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[VAL:%.*]] = call i8 @dummy(ptr [[A]])
+; CHECK-NEXT:    ret i8 [[VAL]]
+;
   %a = alloca i8
   store i8 0, ptr %a
   %i = call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
@@ -43,12 +54,14 @@ exit:
 
 ; We should not value forward %foo to the invariant.end corresponding to %bar.
 define i8 @forward_store1() {
-; CHECK-LABEL: forward_store1
-; CHECK: %foo = call ptr @llvm.invariant.start.p0
-; CHECK-NOT: load
-; CHECK: %bar = call ptr @llvm.invariant.start.p0
-; CHECK: call void @llvm.invariant.end.p0(ptr %bar, i64 1, ptr %a)
-; CHECK: ret i8 0
+; CHECK-LABEL: define i8 @forward_store1() {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    store i8 0, ptr [[A]], align 1
+; CHECK-NEXT:    [[FOO:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr [[A]])
+; CHECK-NEXT:    [[BAR:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr [[A]])
+; CHECK-NEXT:    call void @llvm.invariant.end.p0(ptr [[BAR]], i64 1, ptr [[A]])
+; CHECK-NEXT:    ret i8 0
+;
   %a = alloca i8
   store i8 0, ptr %a
   %foo = call ptr @llvm.invariant.start.p0(i64 1, ptr %a)
diff --git a/llvm/test/Transforms/GVN/load-constant-mem.ll b/llvm/test/Transforms/GVN/load-constant-mem.ll
index d5858d6..f5b0d7c 100644
--- a/llvm/test/Transforms/GVN/load-constant-mem.ll
+++ b/llvm/test/Transforms/GVN/load-constant-mem.ll
@@ -1,19 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn,instcombine -S | FileCheck %s
+
 ; PR4189
 @G = external constant [4 x i32]
 
 define i32 @test(ptr %p, i32 %i) nounwind {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store i8 4, ptr [[P]], align 1
+; CHECK-NEXT:    ret i32 0
+;
 entry:
-	%P = getelementptr [4 x i32], ptr @G, i32 0, i32 %i
-	%A = load i32, ptr %P
-	store i8 4, ptr %p
-	%B = load i32, ptr %P
-	%C = sub i32 %A, %B
-	ret i32 %C
+  %P = getelementptr [4 x i32], ptr @G, i32 0, i32 %i
+  %A = load i32, ptr %P
+  store i8 4, ptr %p
+  %B = load i32, ptr %P
+  %C = sub i32 %A, %B
+  ret i32 %C
 }
-
-; CHECK: define i32 @test(ptr %p, i32 %i) #0 {
-; CHECK-NEXT: entry:
-; CHECK-NEXT:   store i8 4, ptr %p, align 1
-; CHECK-NEXT:   ret i32 0
-; CHECK-NEXT: }
diff --git a/llvm/test/Transforms/GVN/load-from-unreachable-predecessor.ll b/llvm/test/Transforms/GVN/load-from-unreachable-predecessor.ll
index 6ad0f59..c0b20d3 100644
--- a/llvm/test/Transforms/GVN/load-from-unreachable-predecessor.ll
+++ b/llvm/test/Transforms/GVN/load-from-unreachable-predecessor.ll
@@ -1,12 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 ; Check that an unreachable predecessor to a PHI node doesn't cause a crash.
-; PR21625.
-
+; PR21625. The first load should be removed, since it's ignored.
 define i32 @f(ptr %f) {
-; CHECK: bb0:
-; Load should be removed, since it's ignored.
-; CHECK-NEXT: br label
+; CHECK-LABEL: define i32 @f(
+; CHECK-SAME: ptr [[F:%.*]]) {
+; CHECK-NEXT:  [[BB0:.*]]:
+; CHECK-NEXT:    br label %[[BB2:.*]]
+; CHECK:       [[BB1:.*]]:
+; CHECK-NEXT:    [[ZED:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT:    br i1 false, label %[[BB1]], label %[[BB2]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    [[FOO:%.*]] = phi ptr [ null, %[[BB0]] ], [ [[ZED]], %[[BB1]] ]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = load i32, ptr [[FOO]], align 4
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
 bb0:
   %bar = load ptr, ptr %f
   br label %bb2
diff --git a/llvm/test/Transforms/GVN/malloc-load-removal.ll b/llvm/test/Transforms/GVN/malloc-load-removal.ll
index 0aa4beb..c86990f 100644
--- a/llvm/test/Transforms/GVN/malloc-load-removal.ll
+++ b/llvm/test/Transforms/GVN/malloc-load-removal.ll
@@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes=gvn < %s | FileCheck %s
+
 ; PR13694
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -6,6 +8,17 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 declare noalias ptr @malloc(i64) nounwind allockind("alloc,uninitialized") allocsize(0)
 
 define noalias ptr @test1() nounwind uwtable ssp {
+; CHECK-LABEL: define noalias ptr @test1(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i64 100) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    br i1 undef, label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    store i8 0, ptr [[CALL]], align 1
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
 entry:
   %call = tail call ptr @malloc(i64 100) nounwind
   %0 = load i8, ptr %call, align 1
@@ -18,19 +31,22 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %if.then, %entry
   ret ptr %call
-
-; CHECK-LABEL: @test1(
-; CHECK-NOT: load
-; CHECK-NOT: icmp
-
-; CHECK_NO_LIBCALLS-LABEL: @test1(
-; CHECK_NO_LIBCALLS: load
-; CHECK_NO_LIBCALLS: icmp
 }
 
 declare noalias ptr @_Znwm(i64) nounwind
 
 define noalias ptr @test2() nounwind uwtable ssp {
+; CHECK-LABEL: define noalias ptr @test2(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @_Znwm(i64 100) #[[ATTR2]]
+; CHECK-NEXT:    br i1 undef, label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    store i8 0, ptr [[CALL]], align 1
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
 entry:
   %call = tail call ptr @_Znwm(i64 100) nounwind
   %0 = load i8, ptr %call, align 1
@@ -43,19 +59,22 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %if.then, %entry
   ret ptr %call
-
-; CHECK-LABEL: @test2(
-; CHECK-NOT: load
-; CHECK-NOT: icmp
-
-; CHECK_NO_LIBCALLS-LABEL: @test2(
-; CHECK_NO_LIBCALLS: load
-; CHECK_NO_LIBCALLS: icmp
 }
 
 declare noalias ptr @aligned_alloc(i64 allocalign, i64) nounwind allockind("alloc,uninitialized,aligned") allocsize(1)
 
 define noalias ptr @test3() nounwind uwtable ssp {
+; CHECK-LABEL: define noalias ptr @test3(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @aligned_alloc(i64 256, i64 32) #[[ATTR2]]
+; CHECK-NEXT:    br i1 undef, label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    store i8 0, ptr [[CALL]], align 1
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
 entry:
   %call = tail call ptr @aligned_alloc(i64 256, i64 32) nounwind
   %0 = load i8, ptr %call, align 32
@@ -68,12 +87,4 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %if.then, %entry
   ret ptr %call
-
-; CHECK-LABEL: @test3(
-; CHECK-NOT: load
-; CHECK-NOT: icmp
-
-; CHECK_NO_LIBCALLS-LABEL: @test3(
-; CHECK_NO_LIBCALLS: load
-; CHECK_NO_LIBCALLS: icmp
 }
diff --git a/llvm/test/Transforms/GVN/mssa-update-dead-def.ll b/llvm/test/Transforms/GVN/mssa-update-dead-def.ll
index ad71a04..1a5b704 100644
--- a/llvm/test/Transforms/GVN/mssa-update-dead-def.ll
+++ b/llvm/test/Transforms/GVN/mssa-update-dead-def.ll
@@ -1,12 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes='require<memoryssa>,gvn' -verify-memoryssa -S %s | FileCheck %s
 
 ; This is a regression test for a bug in MemorySSA updater.
 ; Make sure that we don't crash and end up with a valid MemorySSA.
 
-; CHECK: @test()
 define void @test() personality ptr null {
+; CHECK-LABEL: define void @test() personality ptr null {
+; CHECK-NEXT:    invoke void @bar()
+; CHECK-NEXT:            to label %[[BAR_NORMAL:.*]] unwind label %[[EXCEPTIONAL:.*]]
+; CHECK:       [[BAR_NORMAL]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[DEAD_BLOCK:.*:]]
+; CHECK-NEXT:    invoke void @baz()
+; CHECK-NEXT:            to label %[[BAZ_NORMAL:.*]] unwind label %[[EXCEPTIONAL]]
+; CHECK:       [[BAZ_NORMAL]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[EXCEPTIONAL]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    ret void
+;
   invoke void @bar()
-          to label %bar.normal unwind label %exceptional
+  to label %bar.normal unwind label %exceptional
 
 bar.normal:
   ret void
@@ -16,14 +32,14 @@ dead.block:
 
 baz.invoke:
   invoke void @baz()
-          to label %baz.normal unwind label %exceptional
+  to label %baz.normal unwind label %exceptional
 
 baz.normal:
   ret void
 
 exceptional:
   %tmp9 = landingpad { ptr, i32 }
-          cleanup
+  cleanup
   call void @foo()
   ret void
 }
diff --git a/llvm/test/Transforms/GVN/no-mem-dep-info.ll b/llvm/test/Transforms/GVN/no-mem-dep-info.ll
index 0380b7e..5f67902 100644
--- a/llvm/test/Transforms/GVN/no-mem-dep-info.ll
+++ b/llvm/test/Transforms/GVN/no-mem-dep-info.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt %s -passes=gvn -S -enable-gvn-memdep=false | FileCheck %s
 ; RUN: opt %s -passes=gvn -S -enable-gvn-memdep=true | FileCheck %s
 
@@ -11,6 +12,17 @@ declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, ptr, <8 x i32>,
 
 ; Function Attrs: nounwind
 define <8 x float> @foo1(ptr noalias readonly %arr.ptr, ptr noalias readonly %vix.ptr, ptr noalias %t2.ptr) #1 {
+; CHECK-LABEL: define <8 x float> @foo1(
+; CHECK-SAME: ptr noalias readonly [[ARR_PTR:%.*]], ptr noalias readonly [[VIX_PTR:%.*]], ptr noalias [[T2_PTR:%.*]]) {
+; CHECK-NEXT:  [[ALLOCAS:.*:]]
+; CHECK-NEXT:    [[VIX:%.*]] = load <8 x i32>, ptr [[VIX_PTR]], align 4
+; CHECK-NEXT:    [[T1_PTR:%.*]] = getelementptr i8, ptr [[ARR_PTR]], i8 4
+; CHECK-NEXT:    [[V1:%.*]] = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, ptr [[ARR_PTR]], <8 x i32> [[VIX]], <8 x float> splat (float 0xFFFFFFFFE0000000), i8 1)
+; CHECK-NEXT:    store i8 1, ptr [[T1_PTR]], align 4
+; CHECK-NEXT:    [[V2:%.*]] = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, ptr [[ARR_PTR]], <8 x i32> [[VIX]], <8 x float> splat (float 0xFFFFFFFFE0000000), i8 1)
+; CHECK-NEXT:    [[RES:%.*]] = fadd <8 x float> [[V1]], [[V2]]
+; CHECK-NEXT:    ret <8 x float> [[RES]]
+;
 allocas:
   %vix = load <8 x i32>, ptr %vix.ptr, align 4
   %t1.ptr = getelementptr i8, ptr %arr.ptr, i8 4
@@ -23,7 +35,3 @@ allocas:
 
   ret <8 x float> %res
 }
-; CHECK: foo1
-; CHECK: llvm.x86.avx2.gather.d.ps.256
-; CHECK: store
-; CHECK: llvm.x86.avx2.gather.d.ps.256
diff --git a/llvm/test/Transforms/GVN/noalias.ll b/llvm/test/Transforms/GVN/noalias.ll
index 98cc930..f28023d 100644
--- a/llvm/test/Transforms/GVN/noalias.ll
+++ b/llvm/test/Transforms/GVN/noalias.ll
@@ -1,9 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 define i32 @test1(ptr %p, ptr %q) {
-; CHECK-LABEL: @test1(ptr %p, ptr %q)
-; CHECK: load i32, ptr %p, align 4, !noalias ![[SCOPE1:[0-9]+]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test1(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P]], align 4, !noalias [[META0:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = load i32, ptr %p, !noalias !3
   %b = load i32, ptr %p
   %c = add i32 %a, %b
@@ -11,9 +15,12 @@ define i32 @test1(ptr %p, ptr %q) {
 }
 
 define i32 @test2(ptr %p, ptr %q) {
-; CHECK-LABEL: @test2(ptr %p, ptr %q)
-; CHECK: load i32, ptr %p, align 4, !alias.scope ![[SCOPE1]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test2(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = load i32, ptr %p, !alias.scope !3
   %b = load i32, ptr %p, !alias.scope !3
   %c = add i32 %a, %b
@@ -21,17 +28,18 @@ define i32 @test2(ptr %p, ptr %q) {
 }
 
 define i32 @test3(ptr %p, ptr %q) {
-; CHECK-LABEL: @test3(ptr %p, ptr %q)
-; CHECK: load i32, ptr %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]]
-; CHECK: %c = add i32 %a, %a
+; CHECK-LABEL: define i32 @test3(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[P]], align 4, !alias.scope [[META3:![0-9]+]]
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[A]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
   %a = load i32, ptr %p, !alias.scope !4
   %b = load i32, ptr %p, !alias.scope !5
   %c = add i32 %a, %b
   ret i32 %c
 }
 
-; CHECK:   ![[SCOPE1]] = !{!{{[0-9]+}}}
-; CHECK:   ![[SCOPE2]] = !{!{{[0-9]+}}}
 declare i32 @foo(ptr) readonly
 
 !0 = distinct !{!0, !2, !"callee0: %a"}
@@ -41,3 +49,10 @@ declare i32 @foo(ptr) readonly
 !3 = !{!0}
 !4 = !{!1}
 !5 = !{!0, !1}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"callee0: %a"}
+; CHECK: [[META2]] = distinct !{[[META2]], !"callee0"}
+; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
+; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"callee0: %b"}
+;.
diff --git a/llvm/test/Transforms/GVN/non-local-offset.ll b/llvm/test/Transforms/GVN/non-local-offset.ll
index 0467657..19b571e 100644
--- a/llvm/test/Transforms/GVN/non-local-offset.ll
+++ b/llvm/test/Transforms/GVN/non-local-offset.ll
@@ -1,16 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64"
 
 ; GVN should ignore the store to p[1] to see that the load from p[0] is
 ; fully redundant.
-
-; CHECK-LABEL: @yes(
-; CHECK: if.then:
-; CHECK-NEXT: store i32 0, ptr %q
-; CHECK-NEXT: ret void
-
 define void @yes(i1 %c, ptr %p, ptr %q) nounwind {
+; CHECK-LABEL: define void @yes(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
+; CHECK-NEXT:    store i32 1, ptr [[P1]], align 4
+; CHECK-NEXT:    br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    store i32 0, ptr [[Q]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   store i32 0, ptr %p
   %p1 = getelementptr inbounds i32, ptr %p, i64 1
@@ -29,16 +37,22 @@ if.else:
 ; GVN should ignore the store to p[1] to see that the first load from p[0] is
 ; fully redundant. However, the second load is larger, so it's not a simple
 ; redundancy.
-
-; CHECK-LABEL: @watch_out_for_size_change(
-; CHECK: if.then:
-; CHECK-NEXT: store i32 0, ptr %q
-; CHECK-NEXT: ret void
-; CHECK: if.else:
-; CHECK: load i64, ptr %p
-; CHECK: store i64
-
 define void @watch_out_for_size_change(i1 %c, ptr %p, ptr %q) nounwind {
+; CHECK-LABEL: define void @watch_out_for_size_change(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
+; CHECK-NEXT:    store i32 1, ptr [[P1]], align 4
+; CHECK-NEXT:    br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    store i32 0, ptr [[Q]], align 4
+; CHECK-NEXT:    ret void
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    [[T64:%.*]] = load i64, ptr [[P]], align 4
+; CHECK-NEXT:    store i64 [[T64]], ptr [[Q]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   store i32 0, ptr %p
   %p1 = getelementptr inbounds i32, ptr %p, i64 1
diff --git a/llvm/test/Transforms/GVN/nonescaping-malloc.ll b/llvm/test/Transforms/GVN/nonescaping-malloc.ll
index 76d8cda..f67c958 100644
--- a/llvm/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/llvm/test/Transforms/GVN/nonescaping-malloc.ll
@@ -1,5 +1,7 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; REQUIRES: asserts
 ; RUN: opt < %s -passes=gvn -stats -disable-output 2>&1 | FileCheck %s
+
 ; rdar://7363102
 
 ; CHECK: Number of loads deleted
@@ -102,3 +104,5 @@ _ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringM
 }
 
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/Transforms/GVN/null-aliases-nothing.ll b/llvm/test/Transforms/GVN/null-aliases-nothing.ll
index dc4ff406..81d44ce 100644
--- a/llvm/test/Transforms/GVN/null-aliases-nothing.ll
+++ b/llvm/test/Transforms/GVN/null-aliases-nothing.ll
@@ -1,19 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 
 %t = type { i32 }
 declare void @test1f(ptr)
 
-define void @test1(ptr noalias %stuff ) {
-    %before = load i32, ptr %stuff
-
-    call void @test1f(ptr null)
-
-    %after = load i32, ptr %stuff ; <--- This should be a dead load
-    %sum = add i32 %before, %after
-
-    store i32 %sum, ptr %stuff
-    ret void
-; CHECK: load
-; CHECK-NOT: load
-; CHECK: ret void
+; `%stuff` is noalias, `test1f` receives only null, cannot clobber `%stuff`,
+; thus the second load is dead.
+define void @test1(ptr noalias %stuff) {
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr noalias [[STUFF:%.*]]) {
+; CHECK-NEXT:    [[BEFORE:%.*]] = load i32, ptr [[STUFF]], align 4
+; CHECK-NEXT:    call void @test1f(ptr null)
+; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[BEFORE]], [[BEFORE]]
+; CHECK-NEXT:    store i32 [[SUM]], ptr [[STUFF]], align 4
+; CHECK-NEXT:    ret void
+;
+  %before = load i32, ptr %stuff
+  call void @test1f(ptr null)
+  %after = load i32, ptr %stuff
+  %sum = add i32 %before, %after
+  store i32 %sum, ptr %stuff
+  ret void
 }
diff --git a/llvm/test/Transforms/GVN/phi-translate-partial-alias.ll b/llvm/test/Transforms/GVN/phi-translate-partial-alias.ll
index a102976..358816f 100644
--- a/llvm/test/Transforms/GVN/phi-translate-partial-alias.ll
+++ b/llvm/test/Transforms/GVN/phi-translate-partial-alias.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
@@ -6,12 +7,19 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; not actually redundant around the loop backedge, despite appearances
 ; if phi-translation is ignored.
 
-; CHECK: define void @test0(ptr %begin)
-; CHECK: loop:
-; CHECK:   %l0 = load i8, ptr %phi
-; CHECK:   call void @bar(i8 %l0)
-; CHECK:   %l1 = load i8, ptr %phi
 define void @test0(ptr %begin) {
+; CHECK-LABEL: define void @test0(
+; CHECK-SAME: ptr [[BEGIN:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[BEGIN]], %[[ENTRY]] ], [ [[NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[L0:%.*]] = load i8, ptr [[PHI]], align 1
+; CHECK-NEXT:    call void @bar(i8 [[L0]])
+; CHECK-NEXT:    [[L1:%.*]] = load i8, ptr [[PHI]], align 1
+; CHECK-NEXT:    [[NEXT]] = getelementptr inbounds i8, ptr [[PHI]], i8 [[L1]]
+; CHECK-NEXT:    br label %[[LOOP]]
+;
 entry:
   br label %loop
 
diff --git a/llvm/test/Transforms/GVN/pr10820.ll b/llvm/test/Transforms/GVN/pr10820.ll
index 48b13a4..4b7be9c 100644
--- a/llvm/test/Transforms/GVN/pr10820.ll
+++ b/llvm/test/Transforms/GVN/pr10820.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 
 target datalayout =
@@ -7,12 +8,16 @@ target triple = "x86_64-unknown-linux-gnu"
 @g = external global i31
 
 define void @main() nounwind uwtable {
+; CHECK-LABEL: define void @main(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store i32 402662078, ptr @g, align 8
+; CHECK-NEXT:    store i31 402662078, ptr undef, align 1
+; CHECK-NEXT:    unreachable
+;
 entry:
-; CHECK: store i32
   store i32 402662078, ptr @g, align 8
-; CHECK-NOT: load i31
   %0 = load i31, ptr @g, align 8
-; CHECK: store i31
   store i31 %0, ptr undef, align 1
   unreachable
 }
diff --git a/llvm/test/Transforms/GVN/pr12979.ll b/llvm/test/Transforms/GVN/pr12979.ll
index 2f7a463..5ff3aa2 100644
--- a/llvm/test/Transforms/GVN/pr12979.ll
+++ b/llvm/test/Transforms/GVN/pr12979.ll
@@ -1,10 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 define i32 @test1(i32 %x, i32 %y) {
-; CHECK: @test1(i32 %x, i32 %y)
-; CHECK: %add1 = add i32 %x, %y
-; CHECK: %foo = add i32 %add1, %add1
-
+; CHECK-LABEL: define i32 @test1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[FOO:%.*]] = add i32 [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret i32 [[FOO]]
+;
   %add1 = add nsw i32 %x, %y
   %add2 = add     i32 %x, %y
   %foo = add i32 %add1, %add2
@@ -12,10 +15,12 @@ define i32 @test1(i32 %x, i32 %y) {
 }
 
 define i32 @test2(i32 %x, i32 %y) {
-; CHECK: @test2(i32 %x, i32 %y)
-; CHECK: %add1 = add i32 %x, %y
-; CHECK: %foo = add i32 %add1, %add1
-
+; CHECK-LABEL: define i32 @test2(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[FOO:%.*]] = add i32 [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret i32 [[FOO]]
+;
   %add1 = add nuw i32 %x, %y
   %add2 = add     i32 %x, %y
   %foo = add i32 %add1, %add2
@@ -23,10 +28,12 @@ define i32 @test2(i32 %x, i32 %y) {
 }
 
 define i32 @test3(i32 %x, i32 %y) {
-; CHECK: @test3(i32 %x, i32 %y)
-; CHECK: %add1 = add i32 %x, %y
-; CHECK: %foo = add i32 %add1, %add1
-
+; CHECK-LABEL: define i32 @test3(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[FOO:%.*]] = add i32 [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret i32 [[FOO]]
+;
   %add1 = add nuw nsw i32 %x, %y
   %add2 = add     i32 %x, %y
   %foo = add i32 %add1, %add2
@@ -34,10 +41,12 @@ define i32 @test3(i32 %x, i32 %y) {
 }
 
 define i32 @test4(i32 %x, i32 %y) {
-; CHECK: @test4(i32 %x, i32 %y)
-; CHECK: %add1 = add nsw i32 %x, %y
-; CHECK: %foo = add i32 %add1, %add1
-
+; CHECK-LABEL: define i32 @test4(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[FOO:%.*]] = add i32 [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret i32 [[FOO]]
+;
   %add1 = add nsw i32 %x, %y
   %add2 = add nsw i32 %x, %y
   %foo = add i32 %add1, %add2
@@ -45,10 +54,12 @@ define i32 @test4(i32 %x, i32 %y) {
 }
 
 define i32 @test5(i32 %x, i32 %y) {
-; CHECK: @test5(i32 %x, i32 %y)
-; CHECK: %add1 = add i32 %x, %y
-; CHECK: %foo = add i32 %add1, %add1
-
+; CHECK-LABEL: define i32 @test5(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[FOO:%.*]] = add i32 [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret i32 [[FOO]]
+;
   %add1 = add nuw i32 %x, %y
   %add2 = add nsw i32 %x, %y
   %foo = add i32 %add1, %add2
@@ -56,10 +67,12 @@ define i32 @test5(i32 %x, i32 %y) {
 }
 
 define i32 @test6(i32 %x, i32 %y) {
-; CHECK: @test6(i32 %x, i32 %y)
-; CHECK: %add1 = add nsw i32 %x, %y
-; CHECK: %foo = add i32 %add1, %add1
-
+; CHECK-LABEL: define i32 @test6(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[FOO:%.*]] = add i32 [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret i32 [[FOO]]
+;
   %add1 = add nuw nsw i32 %x, %y
   %add2 = add nsw i32 %x, %y
   %foo = add i32 %add1, %add2
@@ -67,11 +80,12 @@ define i32 @test6(i32 %x, i32 %y) {
 }
 
 define i32 @test7(i32 %x, i32 %y) {
-; CHECK: @test7(i32 %x, i32 %y)
-; CHECK: %add1 = add i32 %x, %y
-; CHECK-NOT: what_is_this
-; CHECK: %foo = add i32 %add1, %add1
-
+; CHECK-LABEL: define i32 @test7(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[FOO:%.*]] = add i32 [[ADD1]], [[ADD1]]
+; CHECK-NEXT:    ret i32 [[FOO]]
+;
   %add1 = add i32 %x, %y, !what_is_this !{}
   %add2 = add i32 %x, %y
   %foo = add i32 %add1, %add2
@@ -81,11 +95,12 @@ define i32 @test7(i32 %x, i32 %y) {
 declare void @mumble(i2, i2)
 
 define void @test8(i2 %x) {
-; CHECK-LABEL: @test8(
-; CHECK:      %[[ashr:.*]] = ashr i2 %x, 1
-; CHECK-NEXT: call void @mumble(i2 %[[ashr]], i2 %[[ashr]])
-; CHECK-NEXT: ret void
-
+; CHECK-LABEL: define void @test8(
+; CHECK-SAME: i2 [[X:%.*]]) {
+; CHECK-NEXT:    [[ASHR0:%.*]] = ashr i2 [[X]], 1
+; CHECK-NEXT:    call void @mumble(i2 [[ASHR0]], i2 [[ASHR0]])
+; CHECK-NEXT:    ret void
+;
   %ashr0 = ashr exact i2 %x, 1
   %ashr1 = ashr i2 %x, 1
   call void @mumble(i2 %ashr0, i2 %ashr1)
diff --git a/llvm/test/Transforms/GVN/pr17732.ll b/llvm/test/Transforms/GVN/pr17732.ll
index c6ebd7a..29c7931c 100644
--- a/llvm/test/Transforms/GVN/pr17732.ll
+++ b/llvm/test/Transforms/GVN/pr17732.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S -o - < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -13,6 +14,12 @@ target triple = "x86_64-unknown-linux-gnu"
 @vector_with_zeroinit = common global %struct.with_vector zeroinitializer, align 4
 
 define i32 @main() {
+; CHECK-LABEL: define i32 @main() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 @array_with_zeroinit, ptr align 4 @main.obj_with_array, i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 @vector_with_zeroinit, ptr align 4 @main.obj_with_vector, i64 12, i1 false)
+; CHECK-NEXT:    ret i32 1
+;
 entry:
   tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 @array_with_zeroinit, ptr align 4 @main.obj_with_array, i64 12, i1 false)
   %0 = load i8, ptr getelementptr inbounds (%struct.with_array, ptr @array_with_zeroinit, i64 0, i32 2), align 4
@@ -23,8 +30,6 @@ entry:
   %conv1 = sext i8 %1 to i32
   %and = and i32 %conv0, %conv1
   ret i32 %and
-; CHECK-LABEL: define i32 @main(
-; CHECK: ret i32 1
 }
 
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
diff --git a/llvm/test/Transforms/GVN/pr17852.ll b/llvm/test/Transforms/GVN/pr17852.ll
index 731cbc6..c464cf2 100644
--- a/llvm/test/Transforms/GVN/pr17852.ll
+++ b/llvm/test/Transforms/GVN/pr17852.ll
@@ -1,7 +1,69 @@
-; RUN: opt < %s -passes=gvn
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn -S -o - < %s | FileCheck %s
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 %struct.S0 = type { [2 x i8], [2 x i8], [4 x i8], [2 x i8], i32, i32, i32, i32 }
+
 define void @fn1(ptr byval(%struct.S0) align 8 %p1) {
+; CHECK-LABEL: define void @fn1(
+; CHECK-SAME: ptr byval([[STRUCT_S0:%.*]]) align 8 [[P1:%.*]]) {
+; CHECK-NEXT:    br label %[[FOR_COND:.*]]
+; CHECK:       [[FOR_COND]]:
+; CHECK-NEXT:    br i1 true, label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[BB1:.*:]]
+; CHECK-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 2
+; CHECK-NEXT:    [[F9:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 7
+; CHECK-NEXT:    br label %[[FOR_COND]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[F22:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 2
+; CHECK-NEXT:    [[F7:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 5
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[F7]], align 8
+; CHECK-NEXT:    br label %[[IF_END40:.*]]
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    br i1 false, label %[[FOR_COND18:.*]], label %[[IF_THEN6:.*]]
+; CHECK:       [[IF_THEN6]]:
+; CHECK-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 2
+; CHECK-NEXT:    [[F5:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 3
+; CHECK-NEXT:    br label %[[IF_END36:.*]]
+; CHECK:       [[FOR_COND18]]:
+; CHECK-NEXT:    call void @fn4()
+; CHECK-NEXT:    br i1 true, label %[[IF_END:.*]], label %[[FOR_COND18_IF_END36_CRIT_EDGE:.*]]
+; CHECK:       [[FOR_COND18_IF_END36_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[IF_END36]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[F321:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 2
+; CHECK-NEXT:    [[F925:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 7
+; CHECK-NEXT:    [[F526:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 3
+; CHECK-NEXT:    [[BF_LOAD27:%.*]] = load i16, ptr [[F526]], align 8
+; CHECK-NEXT:    br label %[[IF_END36]]
+; CHECK:       [[IF_END36]]:
+; CHECK-NEXT:    [[F537:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 3
+; CHECK-NEXT:    [[BF_LOAD38:%.*]] = load i16, ptr [[F537]], align 8
+; CHECK-NEXT:    [[BF_CLEAR39:%.*]] = and i16 [[BF_LOAD38]], -16384
+; CHECK-NEXT:    br label %[[IF_END40]]
+; CHECK:       [[IF_END40]]:
+; CHECK-NEXT:    [[BF_LOAD522:%.*]] = phi i16 [ [[BF_LOAD38]], %[[IF_END36]] ], [ poison, %[[IF_THEN]] ]
+; CHECK-NEXT:    [[F6:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 4
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[F6]], align 4
+; CHECK-NEXT:    call void @fn2(i32 [[TMP18]])
+; CHECK-NEXT:    [[F8:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 6
+; CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[F8]], align 4
+; CHECK-NEXT:    [[TOBOOL41:%.*]] = icmp eq i32 [[TMP19]], 0
+; CHECK-NEXT:    br i1 true, label %[[IF_END40_IF_END50_CRIT_EDGE:.*]], label %[[IF_THEN42:.*]]
+; CHECK:       [[IF_END40_IF_END50_CRIT_EDGE]]:
+; CHECK-NEXT:    [[F551_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 3
+; CHECK-NEXT:    [[BF_LOAD52_PRE:%.*]] = load i16, ptr [[F551_PHI_TRANS_INSERT]], align 8
+; CHECK-NEXT:    br label %[[IF_END50:.*]]
+; CHECK:       [[IF_THEN42]]:
+; CHECK-NEXT:    [[F547:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 3
+; CHECK-NEXT:    [[BF_LOAD48:%.*]] = load i16, ptr [[F547]], align 8
+; CHECK-NEXT:    br label %[[IF_END50]]
+; CHECK:       [[IF_END50]]:
+; CHECK-NEXT:    [[BF_LOAD52:%.*]] = phi i16 [ [[BF_LOAD52_PRE]], %[[IF_END40_IF_END50_CRIT_EDGE]] ], [ [[BF_LOAD522]], %[[IF_THEN42]] ]
+; CHECK-NEXT:    [[F551:%.*]] = getelementptr inbounds [[STRUCT_S0]], ptr [[P1]], i64 0, i32 3
+; CHECK-NEXT:    [[BF_CLEAR53:%.*]] = and i16 [[BF_LOAD52]], -16384
+; CHECK-NEXT:    ret void
+;
   br label %for.cond
 for.cond:                                         ; preds = %1, %0
   br label %for.end
diff --git a/llvm/test/Transforms/GVN/pr24397.ll b/llvm/test/Transforms/GVN/pr24397.ll
index 8ef9360..a663350 100644
--- a/llvm/test/Transforms/GVN/pr24397.ll
+++ b/llvm/test/Transforms/GVN/pr24397.ll
@@ -1,8 +1,21 @@
-; RUN: opt -passes=gvn -disable-output < %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn -S -o - < %s | FileCheck %s
 
 target triple = "x86_64-unknown-linux-gnu"
 
 define i64 @foo(ptr %arrayidx) {
+; CHECK-LABEL: define i64 @foo(
+; CHECK-SAME: ptr [[ARRAYIDX:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[P:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[CMPNULL:%.*]] = icmp eq ptr [[P]], null
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    br label %[[BB2:.*]]
+; CHECK:       [[ENTRY2:.*:]]
+; CHECK-NEXT:    br label %[[BB2]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    ret i64 [[TMP0]]
+;
 entry:
   %p = load ptr, ptr %arrayidx, align 8
   %cmpnull = icmp eq ptr %p, null
diff --git a/llvm/test/Transforms/GVN/pr24426.ll b/llvm/test/Transforms/GVN/pr24426.ll
index 2a08857..d296e15a0 100644
--- a/llvm/test/Transforms/GVN/pr24426.ll
+++ b/llvm/test/Transforms/GVN/pr24426.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=memcpyopt,mldst-motion,gvn -S | FileCheck %s
 
 declare void @check(i8)
@@ -5,13 +6,17 @@ declare void @check(i8)
 declare void @write(ptr %res)
 
 define void @test1() {
+; CHECK-LABEL: define void @test1() {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [10 x i8], align 1
+; CHECK-NEXT:    call void @write(ptr [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+; CHECK-NEXT:    call void @check(i8 [[TMP2]])
+; CHECK-NEXT:    ret void
+;
   %1 = alloca [10 x i8]
   call void @write(ptr %1)
   %2 = load i8, ptr %1
-
-; CHECK-NOT: undef
   call void @check(i8 %2)
-
   ret void
 }
 
diff --git a/llvm/test/Transforms/GVN/pr25440.ll b/llvm/test/Transforms/GVN/pr25440.ll
index 507111ef..046775e 100644
--- a/llvm/test/Transforms/GVN/pr25440.ll
+++ b/llvm/test/Transforms/GVN/pr25440.ll
@@ -1,4 +1,5 @@
-;RUN: opt -passes=gvn -S < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
 target triple = "thumbv7--linux-gnueabi"
@@ -10,19 +11,53 @@ target triple = "thumbv7--linux-gnueabi"
 
 ; Function Attrs: nounwind
 define fastcc void @foo(ptr nocapture readonly %x) {
-;CHECK-LABEL: foo
+; CHECK-LABEL: define fastcc void @foo(
+; CHECK-SAME: ptr readonly captures(none) [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BB0:.*]]
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:    [[X_TR:%.*]] = phi ptr [ [[X]], %[[ENTRY]] ], [ null, %[[LAND_LHS_TRUE:.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[X_TR]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT:    switch i32 [[CONV]], label %[[IF_END_50:.*]] [
+; CHECK-NEXT:      i32 43, label %[[CLEANUP:.*]]
+; CHECK-NEXT:      i32 52, label %[[IF_THEN_5:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[IF_THEN_5]]:
+; CHECK-NEXT:    br i1 undef, label %[[LAND_LHS_TRUE]], label %[[IF_THEN_26:.*]]
+; CHECK:       [[LAND_LHS_TRUE]]:
+; CHECK-NEXT:    br i1 undef, label %[[CLEANUP]], label %[[BB0]]
+; CHECK:       [[IF_THEN_26]]:
+; CHECK-NEXT:    br i1 undef, label %[[COND_END:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    [[MODE:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[X_TR]], i32 0, i32 1
+; CHECK-NEXT:    [[BF_LOAD:%.*]] = load i16, ptr [[MODE]], align 2
+; CHECK-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 8
+; CHECK-NEXT:    br label %[[COND_END]]
+; CHECK:       [[COND_END]]:
+; CHECK-NEXT:    br i1 undef, label %[[IF_THEN_44:.*]], label %[[CLEANUP]]
+; CHECK:       [[IF_THEN_44]]:
+; CHECK-NEXT:    unreachable
+; CHECK:       [[IF_END_50]]:
+; CHECK-NEXT:    [[ARRAYIDX52:%.*]] = getelementptr inbounds [0 x i32], ptr @length, i32 0, i32 [[CONV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX52]], align 4
+; CHECK-NEXT:    br i1 undef, label %[[FOR_BODY_57:.*]], label %[[CLEANUP]]
+; CHECK:       [[FOR_BODY_57]]:
+; CHECK-NEXT:    [[I_2157:%.*]] = add nsw i32 [[TMP1]], -1
+; CHECK-NEXT:    unreachable
+; CHECK:       [[CLEANUP]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %bb0
 
 bb0:                                      ; preds = %land.lhs.true, %entry
-;CHECK: bb0:
   %x.tr = phi ptr [ %x, %entry ], [ null, %land.lhs.true ]
   %0 = load i16, ptr %x.tr, align 4
-; CHECK: load i16, ptr
   %conv = zext i16 %0 to i32
   switch i32 %conv, label %if.end.50 [
-    i32 43, label %cleanup
-    i32 52, label %if.then.5
+  i32 43, label %cleanup
+  i32 52, label %if.then.5
   ]
 
 if.then.5:                                        ; preds = %bb0
@@ -36,8 +71,6 @@ if.then.26:                                       ; preds = %if.then.5
   br i1 undef, label %cond.end, label %cond.false
 
 cond.false:                                       ; preds = %if.then.26
-; CHECK: cond.false:
-; CHECK: load i16
   %mode = getelementptr inbounds %struct.a, ptr %x.tr.lcssa163, i32 0, i32 1
   %bf.load = load i16, ptr %mode, align 2
   %bf.shl = shl i16 %bf.load, 8
@@ -50,7 +83,6 @@ if.then.44:                                       ; preds = %cond.end
   unreachable
 
 if.end.50:                                        ; preds = %bb0
-;%CHECK: if.end.50:
   %conv.lcssa = phi i32 [ %conv, %bb0 ]
   %arrayidx52 = getelementptr inbounds [0 x i32], ptr @length, i32 0, i32 %conv.lcssa
   %1 = load i32, ptr %arrayidx52, align 4
@@ -68,7 +100,38 @@ cleanup:                                          ; preds = %if.end.50, %cond.en
 @dfg_text = external global ptr, align 4
 
 define void @dfg_lex() {
-;CHECK-LABEL: dfg_lex
+; CHECK-LABEL: define void @dfg_lex() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[WHILE_BODYTHREAD_PRE_SPLIT:.*]]
+; CHECK:       [[WHILE_BODYTHREAD_PRE_SPLIT]]:
+; CHECK-NEXT:    br i1 undef, label %[[WHILE_BODYTHREAD_PRE_SPLIT_IF_THEN_14_CRIT_EDGE:.*]], label %[[IF_END_15:.*]]
+; CHECK:       [[WHILE_BODYTHREAD_PRE_SPLIT_IF_THEN_14_CRIT_EDGE]]:
+; CHECK-NEXT:    [[V1_PRE:%.*]] = load i32, ptr @dfg_text, align 4
+; CHECK-NEXT:    br label %[[IF_THEN_14:.*]]
+; CHECK:       [[IF_THEN_14]]:
+; CHECK-NEXT:    [[V1:%.*]] = phi i32 [ [[V1_PRE]], %[[WHILE_BODYTHREAD_PRE_SPLIT_IF_THEN_14_CRIT_EDGE]] ], [ [[SUB_PTR_RHS_CAST25:%.*]], %[[WHILE_END:.*]] ]
+; CHECK-NEXT:    br label %[[IF_END_15]]
+; CHECK:       [[IF_END_15]]:
+; CHECK-NEXT:    [[V2:%.*]] = load ptr, ptr @yy_c_buf_p, align 4
+; CHECK-NEXT:    br label %[[WHILE_COND_16:.*]]
+; CHECK:       [[WHILE_COND_16]]:
+; CHECK-NEXT:    br i1 undef, label %[[WHILE_COND_16]], label %[[WHILE_END]]
+; CHECK:       [[WHILE_END]]:
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[V2]], i32 undef
+; CHECK-NEXT:    store ptr [[ADD_PTR]], ptr @dfg_text, align 4
+; CHECK-NEXT:    [[SUB_PTR_RHS_CAST25]] = ptrtoint ptr [[ADD_PTR]] to i32
+; CHECK-NEXT:    [[SUB_PTR_SUB26:%.*]] = sub i32 0, [[SUB_PTR_RHS_CAST25]]
+; CHECK-NEXT:    switch i32 undef, label %[[SW_DEFAULT:.*]] [
+; CHECK-NEXT:      i32 65, label %[[WHILE_BODYTHREAD_PRE_SPLIT]]
+; CHECK-NEXT:      i32 3, label %[[RETURN:.*]]
+; CHECK-NEXT:      i32 57, label %[[WHILE_BODYTHREAD_PRE_SPLIT]]
+; CHECK-NEXT:      i32 60, label %[[IF_THEN_14]]
+; CHECK-NEXT:    ]
+; CHECK:       [[SW_DEFAULT]]:
+; CHECK-NEXT:    unreachable
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %while.bodythread-pre-split
 
@@ -93,10 +156,10 @@ while.end:                                        ; preds = %while.cond.16
   %sub.ptr.rhs.cast25 = ptrtoint ptr %add.ptr to i32
   %sub.ptr.sub26 = sub i32 0, %sub.ptr.rhs.cast25
   switch i32 undef, label %sw.default [
-    i32 65, label %while.bodythread-pre-split
-    i32 3, label %return
-    i32 57, label %while.bodythread-pre-split
-    i32 60, label %if.then.14
+  i32 65, label %while.bodythread-pre-split
+  i32 3, label %return
+  i32 57, label %while.bodythread-pre-split
+  i32 60, label %if.then.14
   ]
 
 sw.default:                                       ; preds = %while.end
diff --git a/llvm/test/Transforms/GVN/pr28562.ll b/llvm/test/Transforms/GVN/pr28562.ll
index 338200a..02301dc 100644
--- a/llvm/test/Transforms/GVN/pr28562.ll
+++ b/llvm/test/Transforms/GVN/pr28562.ll
@@ -1,9 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes=gvn < %s | FileCheck %s
+
 define ptr @test1(ptr %a) {
+; CHECK-LABEL: define ptr @test1(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    [[X1:%.*]] = getelementptr i32, ptr [[A]], i32 10
+; CHECK-NEXT:    ret ptr [[X1]]
+;
   %x1 = getelementptr inbounds i32, ptr %a, i32 10
   %x2 = getelementptr i32, ptr %a, i32 10
   ret ptr %x2
-; CHECK-LABEL: @test1(
-; CHECK: %[[x:.*]] = getelementptr i32, ptr %a, i32 10
-; CHECK: ret ptr %[[x]]
 }
diff --git a/llvm/test/Transforms/GVN/pr28879.ll b/llvm/test/Transforms/GVN/pr28879.ll
index 0c9231d..b961a55 100644
--- a/llvm/test/Transforms/GVN/pr28879.ll
+++ b/llvm/test/Transforms/GVN/pr28879.ll
@@ -1,12 +1,22 @@
-; RUN: opt -passes=gvn <%s -S -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn < %s -S -o - | FileCheck %s
 
 define void @f() {
+; CHECK-LABEL: define void @f() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A:%.*]] = alloca <7 x i1>, align 2
+; CHECK-NEXT:    store <7 x i1> undef, ptr [[A]], align 2
+; CHECK-NEXT:    [[VAL:%.*]] = load i1, ptr [[A]], align 2
+; CHECK-NEXT:    br i1 [[VAL]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_TRUE]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    ret void
+;
 entry:
   %a = alloca <7 x i1>, align 2
   store <7 x i1> undef, ptr %a, align 2
-; CHECK: store <7 x i1> undef, ptr
   %val = load i1, ptr %a, align 2
-; CHECK: load i1, ptr
   br i1 %val, label %cond.true, label %cond.false
 
 cond.true:
@@ -17,11 +27,20 @@ cond.false:
 }
 
 define <7 x i1> @g(ptr %a) {
+; CHECK-LABEL: define <7 x i1> @g(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VEC:%.*]] = load <7 x i1>, ptr [[A]], align 1
+; CHECK-NEXT:    [[VAL:%.*]] = load i1, ptr [[A]], align 2
+; CHECK-NEXT:    br i1 [[VAL]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_TRUE]]:
+; CHECK-NEXT:    ret <7 x i1> [[VEC]]
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    ret <7 x i1> zeroinitializer
+;
 entry:
   %vec = load <7 x i1>, ptr %a
-; CHECK: load <7 x i1>, ptr
   %val = load i1, ptr %a, align 2
-; CHECK: load i1, ptr
   br i1 %val, label %cond.true, label %cond.false
 
 cond.true:
diff --git a/llvm/test/Transforms/GVN/pr36063.ll b/llvm/test/Transforms/GVN/pr36063.ll
index 5ac4c3d..8aaeff6 100644
--- a/llvm/test/Transforms/GVN/pr36063.ll
+++ b/llvm/test/Transforms/GVN/pr36063.ll
@@ -1,6 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=memcpyopt,mldst-motion,gvn -S | FileCheck %s
 
 define void @foo(ptr %ret, i1 %x) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr [[RET:%.*]], i1 [[X:%.*]]) {
+; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    br i1 [[X]], label %[[YES:.*]], label %[[NO:.*]]
+; CHECK:       [[YES]]:
+; CHECK-NEXT:    br label %[[OUT:.*]]
+; CHECK:       [[NO]]:
+; CHECK-NEXT:    br label %[[OUT]]
+; CHECK:       [[OUT]]:
+; CHECK-NEXT:    store i8 5, ptr [[A]], align 1
+; CHECK-NEXT:    store i8 5, ptr [[RET]], align 1
+; CHECK-NEXT:    ret void
+;
   %a = alloca i8
   br i1 %x, label %yes, label %no
 
@@ -14,7 +28,6 @@ no:                                               ; preds = %0
 
 out:                                              ; preds = %no, %yes
   %tmp = load i8, ptr %a
-; CHECK-NOT: undef
   store i8 %tmp, ptr %ret
   ret void
 }
diff --git a/llvm/test/Transforms/GVN/pr42605.ll b/llvm/test/Transforms/GVN/pr42605.ll
index f0ff6d9..3e6241c 100644
--- a/llvm/test/Transforms/GVN/pr42605.ll
+++ b/llvm/test/Transforms/GVN/pr42605.ll
@@ -1,6 +1,9 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn %s -S | FileCheck %s
+
 ; PR42605. Check phi-translate won't translate the value number of a call
 ; to the value of another call with clobber in between.
+
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -12,6 +15,13 @@ declare dso_local i32 @printf(ptr nocapture readonly, ...) local_unnamed_addr
 
 ; Function Attrs: noinline norecurse nounwind readonly uwtable
 define dso_local i32 @_Z3gooi(i32 %i) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local i32 @_Z3gooi(
+; CHECK-SAME: i32 [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[T0:%.*]] = load i32, ptr @global, align 4, !tbaa [[INT_TBAA2:![0-9]+]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[T0]], [[I]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
 entry:
   %t0 = load i32, ptr @global, align 4, !tbaa !2
   %add = add nsw i32 %t0, %i
@@ -20,6 +30,24 @@ entry:
 
 ; Function Attrs: nofree nounwind uwtable
 define dso_local void @noclobber() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @noclobber() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @_Z3gooi(i32 2)
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CALL]], 5
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[ADD]], 2
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @_Z3gooi(i32 3)
+; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CALL1]], 5
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[ADD4_PRE_PHI:%.*]] = phi i32 [ [[ADD2]], %[[IF_THEN]] ], [ [[ADD]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 3, %[[IF_THEN]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT:    [[GLOBAL2_0:%.*]] = phi i32 [ [[ADD2]], %[[IF_THEN]] ], [ [[ADD]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call i32 @_Z3gooi(i32 [[I_0]])
+; CHECK-NEXT:    [[CALL5:%.*]] = tail call i32 (ptr, ...) @printf(ptr @.str, i32 [[GLOBAL2_0]], i32 [[ADD4_PRE_PHI]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %call = tail call i32 @_Z3gooi(i32 2)
   %add = add nsw i32 %call, 5
@@ -32,9 +60,6 @@ if.then:                                          ; preds = %entry
   br label %if.end
 
 ; Check pre happens after phitranslate.
-; CHECK-LABEL: @noclobber
-; CHECK: %add4.pre-phi = phi i32 [ %add2, %if.then ], [ %add, %entry ]
-; CHECK: printf(ptr @.str, i32 %global2.0, i32 %add4.pre-phi)
 
 if.end:                                           ; preds = %if.then, %entry
   %i.0 = phi i32 [ 3, %if.then ], [ 2, %entry ]
@@ -47,6 +72,25 @@ if.end:                                           ; preds = %if.then, %entry
 
 ; Function Attrs: nofree nounwind uwtable
 define dso_local void @hasclobber() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @hasclobber() local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @_Z3gooi(i32 2)
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CALL]], 5
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[ADD]], 2
+; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @_Z3gooi(i32 3)
+; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[CALL1]], 5
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 3, %[[IF_THEN]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT:    [[GLOBAL2_0:%.*]] = phi i32 [ [[ADD2]], %[[IF_THEN]] ], [ [[ADD]], %[[ENTRY]] ]
+; CHECK-NEXT:    store i32 5, ptr @global, align 4, !tbaa [[INT_TBAA2]]
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call i32 @_Z3gooi(i32 [[I_0]])
+; CHECK-NEXT:    [[ADD4:%.*]] = add nsw i32 [[CALL3]], 5
+; CHECK-NEXT:    [[CALL5:%.*]] = tail call i32 (ptr, ...) @printf(ptr @.str, i32 [[GLOBAL2_0]], i32 [[ADD4]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %call = tail call i32 @_Z3gooi(i32 2)
   %add = add nsw i32 %call, 5
@@ -59,10 +103,6 @@ if.then:                                          ; preds = %entry
   br label %if.end
 
 ; Check no pre happens.
-; CHECK-LABEL: @hasclobber
-; CHECK: %call3 = tail call i32 @_Z3gooi(i32 %i.0)
-; CHECK-NEXT: %add4 = add nsw i32 %call3, 5
-; CHECK-NEXT: printf(ptr @.str, i32 %global2.0, i32 %add4)
 
 if.end:                                           ; preds = %if.then, %entry
   %i.0 = phi i32 [ 3, %if.then ], [ 2, %entry ]
@@ -85,3 +125,9 @@ attributes #0 = { noinline norecurse nounwind readonly uwtable "correctly-rounde
 !3 = !{!"int", !4, i64 0}
 !4 = !{!"omnipotent char", !5, i64 0}
 !5 = !{!"Simple C++ TBAA"}
+;.
+; CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+; CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"Simple C++ TBAA"}
+;.
diff --git a/llvm/test/Transforms/GVN/pr49193.ll b/llvm/test/Transforms/GVN/pr49193.ll
index 9ee9f26..52703ee 100644
--- a/llvm/test/Transforms/GVN/pr49193.ll
+++ b/llvm/test/Transforms/GVN/pr49193.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 @a = external local_unnamed_addr global i32, align 4
@@ -6,9 +7,37 @@
 ; Function Attrs: nounwind readnone
 declare ptr @j() local_unnamed_addr #0
 
-; CHECK: define {{.*}}@k()
-
 define i64 @k() local_unnamed_addr {
+; CHECK-LABEL: define i64 @k() local_unnamed_addr {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    br i1 undef, label %[[BB10_PREHEADER:.*]], label %[[BB3:.*]]
+; CHECK:       [[BB10_PREHEADER]]:
+; CHECK-NEXT:    br label %[[BB13:.*]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[I4:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    [[I5_NOT:%.*]] = icmp eq i32 [[I4]], 0
+; CHECK-NEXT:    [[I8:%.*]] = tail call ptr @j()
+; CHECK-NEXT:    br label %[[BB37:.*]]
+; CHECK:       [[BB13]]:
+; CHECK-NEXT:    br i1 undef, label %[[BB30THREAD_PRE_SPLIT:.*]], label %[[BB16:.*]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[I17:%.*]] = tail call ptr @j()
+; CHECK-NEXT:    br i1 undef, label %[[BB22THREAD_PRE_SPLIT:.*]], label %[[BB37_LOOPEXIT:.*]]
+; CHECK:       [[BB22THREAD_PRE_SPLIT]]:
+; CHECK-NEXT:    br i1 undef, label %[[BB30THREAD_PRE_SPLIT]], label %[[BB37_LOOPEXIT]]
+; CHECK:       [[BB30THREAD_PRE_SPLIT]]:
+; CHECK-NEXT:    [[I31_PR:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    [[I32_NOT2:%.*]] = icmp eq i32 [[I31_PR]], 0
+; CHECK-NEXT:    br i1 undef, label %[[BB37_LOOPEXIT]], label %[[BB13]]
+; CHECK:       [[BB37_LOOPEXIT]]:
+; CHECK-NEXT:    [[I38_PRE:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    br label %[[BB37]]
+; CHECK:       [[BB37]]:
+; CHECK-NEXT:    [[I38:%.*]] = phi i32 [ [[I38_PRE]], %[[BB37_LOOPEXIT]] ], [ [[I4]], %[[BB3]] ]
+; CHECK-NEXT:    store i32 [[I38]], ptr @b, align 4
+; CHECK-NEXT:    [[I39:%.*]] = tail call ptr @j()
+; CHECK-NEXT:    unreachable
+;
 bb:
   br i1 undef, label %bb10.preheader, label %bb3
 
diff --git a/llvm/test/Transforms/GVN/pre-new-inst.ll b/llvm/test/Transforms/GVN/pre-new-inst.ll
index 8e8cea0..0af8ad2 100644
--- a/llvm/test/Transforms/GVN/pre-new-inst.ll
+++ b/llvm/test/Transforms/GVN/pre-new-inst.ll
@@ -1,7 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S %s | FileCheck %s
 
 %MyStruct = type { i32, i32 }
+
 define i8 @foo(i64 %in, ptr %arr, i1 %arg) {
+; CHECK-LABEL: define i8 @foo(
+; CHECK-SAME: i64 [[IN:%.*]], ptr [[ARR:%.*]], i1 [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ADDR:%.*]] = alloca [[MYSTRUCT:%.*]], align 8
+; CHECK-NEXT:    [[DEAD:%.*]] = trunc i64 [[IN]] to i32
+; CHECK-NEXT:    br i1 [[ARG]], label %[[NEXT:.*]], label %[[TMP:.*]]
+; CHECK:       [[TMP]]:
+; CHECK-NEXT:    call void @bar()
+; CHECK-NEXT:    br label %[[NEXT]]
+; CHECK:       [[NEXT]]:
+; CHECK-NEXT:    store i64 [[IN]], ptr [[ADDR]], align 4
+; CHECK-NEXT:    [[RESPTR:%.*]] = getelementptr i8, ptr [[ARR]], i32 [[DEAD]]
+; CHECK-NEXT:    [[RES:%.*]] = load i8, ptr [[RESPTR]], align 1
+; CHECK-NEXT:    ret i8 [[RES]]
+;
   %addr = alloca %MyStruct
   %dead = trunc i64 %in to i32
   br i1 %arg, label %next, label %tmp
@@ -16,11 +32,8 @@ next:
 
 final:
   %idx32 = load i32, ptr %addr
-
-; CHECK: %resptr = getelementptr i8, ptr %arr, i32 %dead
   %resptr = getelementptr i8, ptr %arr, i32 %idx32
   %res = load i8, ptr %resptr
-
   ret i8 %res
 }
 
diff --git a/llvm/test/Transforms/GVN/propagate-ir-flags.ll b/llvm/test/Transforms/GVN/propagate-ir-flags.ll
index 6f4e662..6b11ff5 100644
--- a/llvm/test/Transforms/GVN/propagate-ir-flags.ll
+++ b/llvm/test/Transforms/GVN/propagate-ir-flags.ll
@@ -1,11 +1,15 @@
-
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 
-; CHECK-LABEL: func_fast
-; CHECK:       fadd fast double
-; CHECK-NEXT:  store
-; CHECK-NEXT:  ret
 define double @func_fast(double %a, double %b) {
+; CHECK-LABEL: define double @func_fast(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast double [[B]], 3.000000e+00
+; CHECK-NEXT:    store double [[ADD]], ptr [[A_ADDR]], align 8
+; CHECK-NEXT:    ret double [[ADD]]
+;
 entry:
   %a.addr = alloca double, align 8
   %add = fadd fast double %b, 3.000000e+00
@@ -14,11 +18,15 @@ entry:
   ret double %load_add
 }
 
-; CHECK-LABEL: func_no_fast
-; CHECK:       fadd double
-; CHECK-NEXT:  store
-; CHECK-NEXT:  ret
 define double @func_no_fast(double %a, double %b) {
+; CHECK-LABEL: define double @func_no_fast(
+; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[B]], 3.000000e+00
+; CHECK-NEXT:    store double [[ADD]], ptr [[A_ADDR]], align 8
+; CHECK-NEXT:    ret double [[ADD]]
+;
 entry:
   %a.addr = alloca double, align 8
   %add = fadd fast double %b, 3.000000e+00
@@ -26,4 +34,3 @@ entry:
   %duplicated_add = fadd double %b, 3.000000e+00
   ret double %duplicated_add
 }
-
diff --git a/llvm/test/Transforms/GVN/rle-no-phi-translate.ll b/llvm/test/Transforms/GVN/rle-no-phi-translate.ll
index 8876665..5b8b4db 100644
--- a/llvm/test/Transforms/GVN/rle-no-phi-translate.ll
+++ b/llvm/test/Transforms/GVN/rle-no-phi-translate.ll
@@ -1,5 +1,7 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 ; XFAIL: *
+
 ; FIXME: This should be promotable, but memdep/gvn don't track values
 ; path/edge sensitively enough.
 
@@ -7,22 +9,30 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin7"
 
 define i32 @g(ptr %b, ptr %c) nounwind {
+; CHECK-LABEL: define i32 @g(
+; CHECK-SAME: ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store i32 1, ptr [[B]], align 4
+; CHECK-NEXT:    store i32 2, ptr [[C]], align 4
+; CHECK-NEXT:    br i1 false, label %[[BB:.*]], label %[[BB2:.*]]
+; CHECK:       [[BB]]:
+; CHECK-NEXT:    br label %[[BB2]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    ret i32 [[CV]]
+;
 entry:
-        store i32 1, ptr %b
-        store i32 2, ptr %c
-        
-	%t1 = icmp eq ptr %b, null		; <i1> [#uses=1]
-	br i1 %t1, label %bb, label %bb2
+  store i32 1, ptr %b
+  store i32 2, ptr %c
+
+  %t1 = icmp eq ptr %b, null		; <i1> [#uses=1]
+  br i1 %t1, label %bb, label %bb2
 
 bb:		; preds = %entry
-	br label %bb2
+  br label %bb2
 
 bb2:		; preds = %bb1, %bb
-	%c_addr.0 = phi ptr [ %b, %entry ], [ %c, %bb ]		; <ptr> [#uses=1]
-	%cv = load i32, ptr %c_addr.0, align 4		; <i32> [#uses=1]
-	ret i32 %cv
-; CHECK: bb2:
-; CHECK-NOT: load i32
-; CHECK: ret i32 
+  %c_addr.0 = phi ptr [ %b, %entry ], [ %c, %bb ]		; <ptr> [#uses=1]
+  %cv = load i32, ptr %c_addr.0, align 4		; <i32> [#uses=1]
+  ret i32 %cv
 }
 
diff --git a/llvm/test/Transforms/GVN/rle-nonlocal.ll b/llvm/test/Transforms/GVN/rle-nonlocal.ll
index 06aa188..4cadc40 100644
--- a/llvm/test/Transforms/GVN/rle-nonlocal.ll
+++ b/llvm/test/Transforms/GVN/rle-nonlocal.ll
@@ -1,22 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 
 define i32 @main(ptr %p, i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @main(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  [[BLOCK1:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
+; CHECK:       [[BLOCK2]]:
+; CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[P]], align 8
+; CHECK-NEXT:    br label %[[BLOCK4:.*]]
+; CHECK:       [[BLOCK3]]:
+; CHECK-NEXT:    [[B:%.*]] = load ptr, ptr [[P]], align 8
+; CHECK-NEXT:    br label %[[BLOCK4]]
+; CHECK:       [[BLOCK4]]:
+; CHECK-NEXT:    [[DEAD:%.*]] = phi ptr [ [[A]], %[[BLOCK2]] ], [ [[B]], %[[BLOCK3]] ]
+; CHECK-NEXT:    [[C:%.*]] = load i32, ptr [[DEAD]], align 4
+; CHECK-NEXT:    [[E:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
 block1:
-    %cmp = icmp eq i32 %x, %y
-	br i1 %cmp , label %block2, label %block3
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp , label %block2, label %block3
 
 block2:
- %a = load ptr, ptr %p
- br label %block4
+  %a = load ptr, ptr %p
+  br label %block4
 
 block3:
   %b = load ptr, ptr %p
   br label %block4
 
 block4:
-; CHECK-NOT: %existingPHI = phi
-; CHECK: %DEAD = phi
-  %existingPHI = phi ptr [ %a, %block2 ], [ %b, %block3 ] 
+  %existingPHI = phi ptr [ %a, %block2 ], [ %b, %block3 ]
   %DEAD = load ptr, ptr %p
   %c = load i32, ptr %DEAD
   %d = load i32, ptr %existingPHI
diff --git a/llvm/test/Transforms/GVN/simplify-icf-cache-invalidation.ll b/llvm/test/Transforms/GVN/simplify-icf-cache-invalidation.ll
index 8332a98..f4a4155 100644
--- a/llvm/test/Transforms/GVN/simplify-icf-cache-invalidation.ll
+++ b/llvm/test/Transforms/GVN/simplify-icf-cache-invalidation.ll
@@ -1,7 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
-; CHECK: define {{.*}}@eggs
-
 %struct.zot = type { ptr }
 %struct.wombat = type { ptr }
 %struct.baz = type { i8, ptr }
@@ -11,6 +10,28 @@
 declare ptr @f()
 
 define hidden void @eggs(ptr %arg, i1 %arg2, ptr %arg3, i32 %arg4, ptr %arg5) unnamed_addr align 2 {
+; CHECK-LABEL: define hidden void @eggs(
+; CHECK-SAME: ptr [[ARG:%.*]], i1 [[ARG2:%.*]], ptr [[ARG3:%.*]], i32 [[ARG4:%.*]], ptr [[ARG5:%.*]]) unnamed_addr align 2 {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_WOMBAT:%.*]], align 8
+; CHECK-NEXT:    store ptr @global, ptr [[ARG]], align 8, !invariant.group [[META0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[ARG2]], label %[[BB4:.*]], label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = atomicrmw sub ptr [[ARG3]], i32 [[ARG4]] acq_rel, align 4
+; CHECK-NEXT:    br label %[[BB4]]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARG5]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_BAZ:%.*]], ptr [[TMP5]], i64 0, i32 1
+; CHECK-NEXT:    br i1 [[ARG2]], label %[[BB9:.*]], label %[[BB7:.*]]
+; CHECK:       [[BB7]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = tail call ptr @f()
+; CHECK-NEXT:    br label %[[BB9]]
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    tail call void @quux(ptr [[ARG]], i1 [[ARG2]])
+; CHECK-NEXT:    [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq ptr [[TMP17]], null
+; CHECK-NEXT:    ret void
+;
 bb:
   %tmp = alloca %struct.wombat, align 8
   store ptr @global, ptr %arg, align 8, !invariant.group !0
@@ -45,3 +66,6 @@ declare hidden void @quux(ptr, i1) unnamed_addr #0 align 2
 attributes #0 = { nounwind willreturn }
 
 !0 = !{}
+;.
+; CHECK: [[META0]] = !{}
+;.
diff --git a/llvm/test/Transforms/GVN/stale-loop-info.ll b/llvm/test/Transforms/GVN/stale-loop-info.ll
index 3d6ec67..e253aea 100644
--- a/llvm/test/Transforms/GVN/stale-loop-info.ll
+++ b/llvm/test/Transforms/GVN/stale-loop-info.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes='require<loops>,gvn' -S < %s | FileCheck %s
 
 ; This used to fail with ASAN enabled and if for some reason LoopInfo remained
@@ -14,6 +15,27 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 declare void @snork.1(ptr) local_unnamed_addr #0
 
 define hidden zeroext i1 @eggs(ptr %arg, i1 %arg2, i1 %arg3) unnamed_addr align 2 {
+; CHECK-LABEL: define hidden zeroext i1 @eggs(
+; CHECK-SAME: ptr [[ARG:%.*]], i1 [[ARG2:%.*]], i1 [[ARG3:%.*]]) unnamed_addr align 2 {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    br i1 [[ARG2]], label %[[BB14:.*]], label %[[BB3:.*]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds [[STRUCT_WIBBLE_1028:%.*]], ptr [[ARG]], i64 0, i32 2, i32 0, i32 0, i64 0
+; CHECK-NEXT:    br label %[[BB6:.*]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    br i1 [[ARG3]], label %[[BB11:.*]], label %[[BB8:.*]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8
+; CHECK-NEXT:    br label %[[BB12:.*]]
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[TMP13:%.*]] = phi ptr [ [[TMP]], %[[BB11]] ], [ [[TMP9]], %[[BB8]] ]
+; CHECK-NEXT:    call void @snork.1(ptr [[TMP13]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    br label %[[BB6]]
+; CHECK:       [[BB14]]:
+; CHECK-NEXT:    ret i1 false
+;
 bb:
   br i1 %arg2, label %bb14, label %bb3
 
@@ -29,7 +51,6 @@ bb7:                                              ; preds = %bb6
 
 bb8:                                              ; preds = %bb7
   %tmp9 = load ptr, ptr %tmp, align 8
-; CHECK: %tmp9 = load ptr, ptr %tmp, align 8
   br label %bb12
 
 bb11:                                             ; preds = %bb7
diff --git a/llvm/test/Transforms/GVN/unreachable-predecessor.ll b/llvm/test/Transforms/GVN/unreachable-predecessor.ll
index 532d554..a584189 100644
--- a/llvm/test/Transforms/GVN/unreachable-predecessor.ll
+++ b/llvm/test/Transforms/GVN/unreachable-predecessor.ll
@@ -1,13 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt < %s -passes=gvn -S | FileCheck %s
 
 ; loop.then is not reachable from loop, so we should be able to deduce that the
 ; store through %phi2 cannot alias %ptr1.
-
-; CHECK-LABEL: @test1
 define void @test1(ptr %ptr1, ptr %ptr2) {
-; CHECK-LABEL: entry:
-; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, ptr %ptr1, i64 1
-; CHECK: %[[VAL1:.*]] = load i32, ptr %[[GEP]]
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[PTR1]], i64 1
+; CHECK-NEXT:    [[VAL1_PRE:%.*]] = load i32, ptr [[GEP1]], align 4
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[PHI1:%.*]] = phi ptr [ [[GEP1]], %[[ENTRY]] ], [ [[PHI2:%.*]], %[[LOOP_THEN:.*]] ]
+; CHECK-NEXT:    br i1 false, label %[[LOOP_LOOP_THEN_CRIT_EDGE:.*]], label %[[LOOP_IF:.*]]
+; CHECK:       [[LOOP_LOOP_THEN_CRIT_EDGE]]:
+; CHECK-NEXT:    br label %[[LOOP_THEN]]
+; CHECK:       [[LOOP_IF]]:
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[GEP1]], i64 1
+; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[GEP2]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[VAL1_PRE]], [[VAL2]]
+; CHECK-NEXT:    br label %[[LOOP_THEN]]
+; CHECK:       [[LOOP_THEN]]:
+; CHECK-NEXT:    [[PHI2]] = phi ptr [ poison, %[[LOOP_LOOP_THEN_CRIT_EDGE]] ], [ [[GEP2]], %[[LOOP_IF]] ]
+; CHECK-NEXT:    store i32 [[VAL1_PRE]], ptr [[PHI2]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[PTR1]], align 4
+; CHECK-NEXT:    br label %[[LOOP]]
+;
 entry:
   br label %loop.preheader
 
@@ -15,8 +33,6 @@ loop.preheader:
   %gep1 = getelementptr inbounds i32, ptr %ptr1, i64 1
   br label %loop
 
-; CHECK-LABEL: loop:
-; CHECK-NOT: load
 loop:
   %phi1 = phi ptr [ %gep1, %loop.preheader ], [ %phi2, %loop.then ]
   %val1 = load i32, ptr %phi1
@@ -28,8 +44,6 @@ loop.if:
   %cmp = icmp slt i32 %val1, %val2
   br label %loop.then
 
-; CHECK-LABEL: loop.then
-; CHECK: store i32 %[[VAL1]], ptr %phi2
 loop.then:
   %phi2 = phi ptr [ %ptr2, %loop ], [ %gep2, %loop.if ]
   store i32 %val1, ptr %phi2
diff --git a/llvm/test/Transforms/GVN/unreachable_block_infinite_loop.ll b/llvm/test/Transforms/GVN/unreachable_block_infinite_loop.ll
index 5de5e03..2743fd0 100644
--- a/llvm/test/Transforms/GVN/unreachable_block_infinite_loop.ll
+++ b/llvm/test/Transforms/GVN/unreachable_block_infinite_loop.ll
@@ -1,18 +1,40 @@
-; RUN: opt -passes=gvn -disable-output < %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0"
 
 define i32 @test2() nounwind ssp {
+; CHECK-LABEL: define i32 @test2(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i32 0
+; CHECK:       [[UNREACHABLE_BLOCK:.*:]]
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[A]], 1
+; CHECK-NEXT:    ret i32 [[A]]
+;
 entry:
-    ret i32 0
+  ret i32 0
 
 unreachable_block:
-    %a = add i32 %a, 1
-    ret i32 %a
+  %a = add i32 %a, 1
+  ret i32 %a
 }
 
 define i32 @pr23096_test0(i1 %arg, ptr %arg2) {
+; CHECK-LABEL: define i32 @pr23096_test0(
+; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BB0:.*]]
+; CHECK:       [[BB1:.*]]:
+; CHECK-NEXT:    [[PTR1:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64
+; CHECK-NEXT:    [[PTR2]] = inttoptr i64 [[PTR1]] to ptr
+; CHECK-NEXT:    br i1 [[ARG]], label %[[BB0]], label %[[BB1]]
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[ARG2]], %[[ENTRY]] ], [ [[PTR2]], %[[BB1]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[PHI]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
 entry:
   br label %bb0
 
@@ -28,6 +50,19 @@ bb0:
 }
 
 define i32 @pr23096_test1(i1 %arg, ptr %arg2) {
+; CHECK-LABEL: define i32 @pr23096_test1(
+; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BB0:.*]]
+; CHECK:       [[BB1:.*]]:
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i32 0
+; CHECK-NEXT:    [[PTR2]] = getelementptr i32, ptr [[PTR1]], i32 0
+; CHECK-NEXT:    br i1 [[ARG]], label %[[BB0]], label %[[BB1]]
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[ARG2]], %[[ENTRY]] ], [ [[PTR2]], %[[BB1]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[PHI]], align 4
+; CHECK-NEXT:    ret i32 [[LOAD]]
+;
 entry:
   br label %bb0
 
diff --git a/llvm/test/Transforms/GVN/volatile-nonvolatile.ll b/llvm/test/Transforms/GVN/volatile-nonvolatile.ll
index 72c6a30..d34c891 100644
--- a/llvm/test/Transforms/GVN/volatile-nonvolatile.ll
+++ b/llvm/test/Transforms/GVN/volatile-nonvolatile.ll
@@ -1,13 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=gvn -S < %s | FileCheck %s
 
 %struct.t = type { ptr }
 
 ; The loaded address and the location of the address itself are not aliased,
 ; so the second reload is not necessary. Check that it can be eliminated.
-; CHECK-LABEL: test1
-; CHECK: load
-; CHECK-NOT: load
 define void @test1(ptr nocapture readonly %p, i32 %v) #0 {
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[ANYPTR_TBAA0:![0-9]+]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA5:![0-9]+]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = load ptr, ptr %p, align 4, !tbaa !1
   store volatile i32 %v, ptr %0, align 4, !tbaa !6
@@ -18,11 +24,16 @@ entry:
 
 ; The store via the loaded address may overwrite the address itself.
 ; Make sure that both loads remain.
-; CHECK-LABEL: test2
-; CHECK: load
-; CHECK: store
-; CHECK: load
 define void @test2(ptr nocapture readonly %p, i32 %v) #0 {
+; CHECK-LABEL: define void @test2(
+; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[P]], align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = load ptr, ptr %p, align 4, !tbaa !1
   store volatile i32 %v, ptr %0, align 4, !tbaa !1
@@ -33,11 +44,16 @@ entry:
 
 ; The loads are ordered and non-monotonic. Although they are not aliased to
 ; the stores, make sure both are preserved.
-; CHECK-LABEL: test3
-; CHECK: load
-; CHECK: store
-; CHECK: load
 define void @test3(ptr nocapture readonly %p, i32 %v) #0 {
+; CHECK-LABEL: define void @test3(
+; CHECK-SAME: ptr readonly captures(none) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP0]], align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic ptr, ptr [[P]] acquire, align 4, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store volatile i32 [[V]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA5]]
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = load atomic ptr, ptr %p acquire, align 4, !tbaa !1
   store volatile i32 %v, ptr %0, align 4, !tbaa !6
@@ -56,3 +72,12 @@ attributes #0 = { norecurse nounwind }
 !6 = !{!7, !7, i64 0}
 !7 = !{!"int", !4, i64 0}
 
+;.
+; CHECK: [[ANYPTR_TBAA0]] = !{[[META1:![0-9]+]], [[META2:![0-9]+]], i64 0}
+; CHECK: [[META1]] = !{!"", [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[INT_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"int", [[META3]], i64 0}
+;.
diff --git a/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-users-gep-constexpr.ll b/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-users-gep-constexpr.ll
index 26728a7..70c8fe6 100644
--- a/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-users-gep-constexpr.ll
+++ b/llvm/test/Transforms/GlobalOpt/cleanup-pointer-root-users-gep-constexpr.ll
@@ -44,16 +44,6 @@ entry:
   ret void
 }
 
-define void @stores_ptrtoint_constexpr() {
-; CHECK-LABEL: @stores_ptrtoint_constexpr(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret void
-;
-entry:
-  store i32 0, ptr inttoptr (i64 ptrtoint (ptr @global.20ptr to i64) to ptr), align 8
-  ret void
-}
-
 @gv = internal unnamed_addr global [3 x ptr] zeroinitializer, align 16
 @gv2 = internal unnamed_addr global i32 0, align 4
 
diff --git a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll
index cb66fef..6993fd1 100644
--- a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll
+++ b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll
@@ -321,10 +321,10 @@ for.inc:                                          ; preds = %sw.bb, %sw.bb1, %fo
 ; CHECK-UNROLL-NOT: dls
 ; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
 ; CHECK-UNROLL:     le lr, [[LOOP]]
-; CHECK-UNROLL:     wls lr, r12, [[EXIT:.LBB[0-9_]+]]
+; CHECK-UNROLL:     dls lr, r12
 ; CHECK-UNROLL:     [[EPIL:.LBB[0-9_]+]]:
 ; CHECK-UNROLL:     le lr, [[EPIL]]
-; CHECK-UNROLL-NEXT: [[EXIT]]
+; CHECK-UNROLL-NEXT: {{\.LBB[0-9_]+}}: @ %for.cond.cleanup
 
 define void @unroll_inc_int(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) {
 entry:
@@ -357,10 +357,10 @@ for.body:
 ; CHECK-UNROLL-NOT: dls
 ; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
 ; CHECK-UNROLL:     le lr, [[LOOP]]
-; CHECK-UNROLL:     wls lr, r12, [[EPIL_EXIT:.LBB[0-9_]+]]
+; CHECK-UNROLL:     dls lr, r12
 ; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]:
 ; CHECK-UNROLL:     le lr, [[EPIL]]
-; CHECK-UNROLL: [[EPIL_EXIT]]:
+; CHECK-UNROLL:     {{\.LBB[0-9_]+}}: @ %for.cond.cleanup
 ; CHECK-UNROLL:     pop
 define void @unroll_inc_unsigned(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) {
 entry:
diff --git a/llvm/test/Transforms/Inline/attributes.ll b/llvm/test/Transforms/Inline/attributes.ll
index 42b1a3a..55ab430 100644
--- a/llvm/test/Transforms/Inline/attributes.ll
+++ b/llvm/test/Transforms/Inline/attributes.ll
@@ -26,6 +26,10 @@ define i32 @sanitize_memtag_callee(i32 %i) sanitize_memtag {
   ret i32 %i
 }
 
+define i32 @sanitize_alloc_token_callee(i32 %i) sanitize_alloc_token {
+  ret i32 %i
+}
+
 define i32 @safestack_callee(i32 %i) safestack {
   ret i32 %i
 }
@@ -58,6 +62,10 @@ define i32 @alwaysinline_sanitize_memtag_callee(i32 %i) alwaysinline sanitize_me
   ret i32 %i
 }
 
+define i32 @alwaysinline_sanitize_alloc_token_callee(i32 %i) alwaysinline sanitize_alloc_token {
+  ret i32 %i
+}
+
 define i32 @alwaysinline_safestack_callee(i32 %i) alwaysinline safestack {
   ret i32 %i
 }
@@ -184,6 +192,39 @@ define i32 @test_sanitize_memtag(i32 %arg) sanitize_memtag {
 ; CHECK-NEXT: ret i32
 }
 
+; ---------------------------------------------------------------------------- ;
+
+; Can inline sanitize_alloc_token functions into a noattr function. The
+; attribute is *not* viral, otherwise may break code.
+define i32 @test_no_sanitize_alloc_token(i32 %arg) {
+; CHECK-LABEL: @test_no_sanitize_alloc_token(
+; CHECK-SAME: ) {
+; CHECK-NOT: call
+; CHECK: ret i32
+entry:
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_alloc_token_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_alloc_token_callee(i32 %x3)
+  ret i32 %x4
+}
+
+; Can inline noattr functions into a sanitize_alloc_token function. If
+; inlinable noattr functions cannot be instrumented, they should be marked with
+; explicit noinline.
+define i32 @test_sanitize_alloc_token(i32 %arg) sanitize_alloc_token {
+; CHECK-LABEL: @test_sanitize_alloc_token(
+; CHECK-SAME: ) [[SANITIZE_ALLOC_TOKEN:.*]] {
+; CHECK-NOT: call
+; CHECK: ret i32
+entry:
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_alloc_token_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_alloc_token_callee(i32 %x3)
+  ret i32 %x4
+}
+
 define i32 @test_safestack(i32 %arg) safestack {
   %x1 = call i32 @noattr_callee(i32 %arg)
   %x2 = call i32 @safestack_callee(i32 %x1)
@@ -639,6 +680,7 @@ define i32 @loader_replaceable_caller() {
   ret i32 %1
 }
 
+; CHECK: attributes [[SANITIZE_ALLOC_TOKEN]] = { sanitize_alloc_token }
 ; CHECK: attributes [[SLH]] = { speculative_load_hardening }
 ; CHECK: attributes [[FPMAD_FALSE]] = { "less-precise-fpmad"="false" }
 ; CHECK: attributes [[FPMAD_TRUE]] = { "less-precise-fpmad"="true" }
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
index 361a2b8..378ca1f 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll
@@ -269,42 +269,27 @@ define float @fmed3_constant_src2_1_f32(float %x, float %y) #1 {
 }
 
 define float @fmed3_x_qnan0_qnan1_f32(float %x) #1 {
-; IEEE1-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
-; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IEEE1-NEXT:    ret float [[X]]
-;
-; IEEE0-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
-; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
-; IEEE0-NEXT:    ret float [[MED3]]
+; CHECK-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
+; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret float [[X]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
   ret float %med3
 }
 
 define float @fmed3_qnan0_x_qnan1_f32(float %x) #1 {
-; IEEE1-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
-; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IEEE1-NEXT:    ret float [[X]]
-;
-; IEEE0-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
-; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
-; IEEE0-NEXT:    ret float [[MED3]]
+; CHECK-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
+; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret float [[X]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
   ret float %med3
 }
 
 define float @fmed3_qnan0_qnan1_x_f32(float %x) #1 {
-; IEEE1-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
-; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IEEE1-NEXT:    ret float [[X]]
-;
-; IEEE0-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
-; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
-; IEEE0-NEXT:    ret float [[MED3]]
+; CHECK-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
+; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret float [[X]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
   ret float %med3
@@ -448,8 +433,7 @@ define float @fmed3_snan1_x_snan2_f32(float %x) #1 {
 ;
 ; IEEE0-LABEL: define float @fmed3_snan1_x_snan2_f32(
 ; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000)
-; IEEE0-NEXT:    ret float [[MED3]]
+; IEEE0-NEXT:    ret float [[X]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF0000020000000, float %x, float 0x7FF0000040000000)
   ret float %med3
@@ -462,8 +446,7 @@ define float @fmed3_x_snan1_snan2_f32(float %x) #1 {
 ;
 ; IEEE0-LABEL: define float @fmed3_x_snan1_snan2_f32(
 ; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
-; IEEE0-NEXT:    [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000)
-; IEEE0-NEXT:    ret float [[MED3]]
+; IEEE0-NEXT:    ret float [[X]]
 ;
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF0000020000000, float 0x7FF0000040000000)
   ret float %med3
diff --git a/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll
index 9de9150..8b0a5ca 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-bool-transforms.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 ; TODO: All of these should be optimized to less than or equal to a single
@@ -7,13 +7,13 @@
 ; --- (A op B) op' A   /   (B op A) op' A ---
 
 ; (A land B) land A
-define i1 @land_land_left1(i1 %A, i1 %B) {
+define i1 @land_land_left1(i1 %A, i1 %B) !prof !0 {
 ; CHECK-LABEL: @land_land_left1(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A:%.*]], i1 [[B:%.*]], i1 false, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
-  %c = select i1 %A, i1 %B, i1 false
-  %res = select i1 %c, i1 %A, i1 false
+  %c = select i1 %A, i1 %B, i1 false, !prof !1
+  %res = select i1 %c, i1 %A, i1 false, !prof !2
   ret i1 %res
 }
 define i1 @land_land_left2(i1 %A, i1 %B) {
@@ -157,13 +157,13 @@ define i1 @lor_band_left2(i1 %A, i1 %B) {
 }
 
 ; (A lor B) lor A
-define i1 @lor_lor_left1(i1 %A, i1 %B) {
+define i1 @lor_lor_left1(i1 %A, i1 %B) !prof !0 {
 ; CHECK-LABEL: @lor_lor_left1(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[B:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[B:%.*]], !prof [[PROF1]]
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
-  %c = select i1 %A, i1 true, i1 %B
-  %res = select i1 %c, i1 true, i1 %A
+  %c = select i1 %A, i1 true, i1 %B, !prof !1
+  %res = select i1 %c, i1 true, i1 %A, !prof !2
   ret i1 %res
 }
 define i1 @lor_lor_left2(i1 %A, i1 %B) {
@@ -506,3 +506,12 @@ define <2 x i1> @PR50500_falseval(<2 x i1> %a, <2 x i1> %b) {
   %r = select <2 x i1> %a, <2 x i1> %b, <2 x i1> %s
   ret <2 x i1> %r
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+!2 = !{!"branch_weights", i32 5, i32 7}
+
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll
index 26b5114..3a03f86 100644
--- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll
+++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll
@@ -6,12 +6,12 @@
 ;###############################################################
 ; minnum(X, qnan) -> X
 ; maxnum(X, qnan) -> X
-; TODO: minnum(X, snan) -> qnan (currently we treat SNaN the same as QNaN)
-; TODO: maxnum(X, snan) -> qnan (currently we treat SNaN the same as QNaN)
+; minnum(X, snan) -> qnan
+; maxnum(X, snan) -> qnan
 ; minimum(X, nan) -> qnan
 ; maximum(X, nan) -> qnan
-; TODO: minimumnum(X, nan) -> X
-; TODO: maximumnum(X, nan) -> X
+; minimumnum(X, nan) -> X
+; maximumnum(X, nan) -> X
 
 define void @minmax_qnan_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_qnan_f32(
@@ -19,10 +19,8 @@ define void @minmax_qnan_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %mi
 ; CHECK-NEXT:    store float [[X]], ptr [[MAXNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    store float 0x7FFF000000000000, ptr [[MINIMUM_RES:%.*]], align 4
 ; CHECK-NEXT:    store float 0x7FFF000000000000, ptr [[MAXIMUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FFF000000000000)
-; CHECK-NEXT:    store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float 0x7FFF000000000000)
-; CHECK-NEXT:    store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call float @llvm.minnum.f32(float %x, float 0x7FFF000000000000)
@@ -42,17 +40,15 @@ define void @minmax_qnan_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %mi
   ret void
 }
 
-; TODO currently snan is treated the same as qnan, but maxnum/minnum should really return qnan for these cases, not X
+; Note that maxnum/minnum return qnan here for snan inputs, unlike maximumnum/minimumnum
 define void @minmax_snan_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_snan_f32(
-; CHECK-NEXT:    store float [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 4
-; CHECK-NEXT:    store float [[X]], ptr [[MAXNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float 0x7FFC000000000000, ptr [[MINNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float 0x7FFC000000000000, ptr [[MAXNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    store float 0x7FFC000000000000, ptr [[MINIMUM_RES:%.*]], align 4
 ; CHECK-NEXT:    store float 0x7FFC000000000000, ptr [[MAXIMUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF4000000000000)
-; CHECK-NEXT:    store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float 0x7FF4000000000000)
-; CHECK-NEXT:    store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float [[X:%.*]], ptr [[MINIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call float @llvm.minnum.f32(float %x, float 0x7FF4000000000000)
@@ -78,10 +74,8 @@ define void @minmax_qnan_nxv2f64_op0(<vscale x 2 x double> %x, ptr %minnum_res,
 ; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MAXNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <vscale x 2 x double> splat (double 0x7FF8000DEAD00000), ptr [[MINIMUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <vscale x 2 x double> splat (double 0x7FF8000DEAD00000), ptr [[MAXIMUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> splat (double 0x7FF8000DEAD00000), <vscale x 2 x double> [[X]])
-; CHECK-NEXT:    store <vscale x 2 x double> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call <vscale x 2 x double> @llvm.maximumnum.nxv2f64(<vscale x 2 x double> splat (double 0x7FF8000DEAD00000), <vscale x 2 x double> [[X]])
-; CHECK-NEXT:    store <vscale x 2 x double> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> splat (double 0x7FF8000DEAD00000), <vscale x 2 x double> %x)
@@ -101,17 +95,15 @@ define void @minmax_qnan_nxv2f64_op0(<vscale x 2 x double> %x, ptr %minnum_res,
   ret void
 }
 
-; TODO currently snan is treated the same as qnan, but maxnum/minnum should really return qnan for these cases, not X
+; Note that maxnum/minnum return qnan here for snan inputs, unlike maximumnum/minimumnum
 define void @minmax_snan_nxv2f64_op1(<vscale x 2 x double> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_snan_nxv2f64_op1(
-; CHECK-NEXT:    store <vscale x 2 x double> [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 16
-; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MAXNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 2 x double> splat (double 0x7FFC00DEAD00DEAD), ptr [[MINNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 2 x double> splat (double 0x7FFC00DEAD00DEAD), ptr [[MAXNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <vscale x 2 x double> splat (double 0x7FFC00DEAD00DEAD), ptr [[MINIMUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <vscale x 2 x double> splat (double 0x7FFC00DEAD00DEAD), ptr [[MAXIMUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> splat (double 0x7FF400DEAD00DEAD), <vscale x 2 x double> [[X]])
-; CHECK-NEXT:    store <vscale x 2 x double> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call <vscale x 2 x double> @llvm.maximumnum.nxv2f64(<vscale x 2 x double> splat (double 0x7FF400DEAD00DEAD), <vscale x 2 x double> [[X]])
-; CHECK-NEXT:    store <vscale x 2 x double> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 2 x double> [[X:%.*]], ptr [[MINIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> splat (double 0x7FF400DEAD00DEAD), <vscale x 2 x double> %x)
@@ -131,17 +123,18 @@ define void @minmax_snan_nxv2f64_op1(<vscale x 2 x double> %x, ptr %minnum_res,
   ret void
 }
 
-; TODO Currently, we treat SNaN and QNaN the same. However, for maxnum and minnum, we should not optimize this, as we should return <%x0, QNaN> instead of <%x0, %x1>
+; For maxnum and minnum, we cannot optimize this in InstSimplify, as the result should
+; return <%x0, QNaN> and InstSimplify cannot create the extra instructions required to construct this.
 define void @minmax_mixed_snan_qnan_v2f64(<2 x double> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_mixed_snan_qnan_v2f64(
-; CHECK-NEXT:    store <2 x double> [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 16
-; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MAXNUM_RES:%.*]], align 16
+; CHECK-NEXT:    [[MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double 0x7FF400DEAD00DEAD, double 0x7FF8000FEED00000>, <2 x double> [[X:%.*]])
+; CHECK-NEXT:    store <2 x double> [[MINNUM]], ptr [[MINNUM_RES:%.*]], align 16
+; CHECK-NEXT:    [[MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> <double 0x7FF400DEAD00DEAD, double 0x7FF8000FEED00000>, <2 x double> [[X]])
+; CHECK-NEXT:    store <2 x double> [[MAXNUM]], ptr [[MAXNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <2 x double> <double 0x7FFC00DEAD00DEAD, double 0x7FF8000FEED00000>, ptr [[MINIMUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <2 x double> <double 0x7FFC00DEAD00DEAD, double 0x7FF8000FEED00000>, ptr [[MAXIMUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> <double 0x7FF400DEAD00DEAD, double 0x7FF8000FEED00000>, <2 x double> [[X]])
-; CHECK-NEXT:    store <2 x double> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> <double 0x7FF400DEAD00DEAD, double 0x7FF8000FEED00000>, <2 x double> [[X]])
-; CHECK-NEXT:    store <2 x double> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double 0x7FF400DEAD00DEAD, double 0x7FF8000FEED00000>, <2 x double> %x)
@@ -169,10 +162,8 @@ define void @minmax_mixed_qnan_poison_v2f64(<2 x double> %x, ptr %minnum_res, pt
 ; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MAXNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <2 x double> <double poison, double 0x7FF8000DEAD00000>, ptr [[MINIMUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <2 x double> <double poison, double 0x7FF8000DEAD00000>, ptr [[MAXIMUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> <double poison, double 0x7FF8000DEAD00000>, <2 x double> [[X]])
-; CHECK-NEXT:    store <2 x double> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> <double poison, double 0x7FF8000DEAD00000>, <2 x double> [[X]])
-; CHECK-NEXT:    store <2 x double> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double poison, double 0x7FF8000DEAD00000>, <2 x double> %x)
@@ -201,10 +192,8 @@ define void @minmax_poison_op0_f16(half %x, ptr %minnum_res, ptr %maxnum_res, pt
 ; CHECK-NEXT:    store half [[X]], ptr [[MAXNUM_RES:%.*]], align 2
 ; CHECK-NEXT:    store half [[X]], ptr [[MINIMUM_RES:%.*]], align 2
 ; CHECK-NEXT:    store half [[X]], ptr [[MAXIMUM_RES:%.*]], align 2
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call half @llvm.minimumnum.f16(half poison, half [[X]])
-; CHECK-NEXT:    store half [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 2
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call half @llvm.maximumnum.f16(half poison, half [[X]])
-; CHECK-NEXT:    store half [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 2
+; CHECK-NEXT:    store half [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 2
+; CHECK-NEXT:    store half [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 2
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call half @llvm.minnum.f16(half poison, half %x)
@@ -230,10 +219,8 @@ define void @minmax_poison_op1_nxv2f64(<vscale x 2 x double> %x, ptr %minnum_res
 ; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MAXNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MINIMUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MAXIMUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call nnan <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> [[X]], <vscale x 2 x double> poison)
-; CHECK-NEXT:    store <vscale x 2 x double> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call nnan <vscale x 2 x double> @llvm.maximumnum.nxv2f64(<vscale x 2 x double> [[X]], <vscale x 2 x double> poison)
-; CHECK-NEXT:    store <vscale x 2 x double> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <vscale x 2 x double> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call nnan <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x double> poison)
@@ -260,10 +247,10 @@ define void @minmax_poison_op1_nxv2f64(<vscale x 2 x double> %x, ptr %minnum_res
 ; minnum(X, +inf) -> X if nnan (ignoring NaN quieting)
 ; maximum(X, +inf) -> +inf if nnan
 ; minimum(X, +inf) -> X (ignoring NaN quieting)
-; TODO: maximumnum(X, +inf) -> +inf
-; TODO: minimumnum(X, +inf) -> X if nnan (ignoring NaN quieting)
+; maximumnum(X, +inf) -> +inf
+; minimumnum(X, +inf) -> X if nnan (ignoring NaN quieting)
 
-; Can only optimize maxnum and minimum without the nnan flag
+; Can only optimize maxnum, minimum, and maximumnum without the nnan flag
 define void @minmax_pos_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_pos_inf_f32(
 ; CHECK-NEXT:    [[MINNUM:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 0x7FF0000000000000)
@@ -274,8 +261,7 @@ define void @minmax_pos_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr
 ; CHECK-NEXT:    store float [[MAXIMUM]], ptr [[MAXIMUM_RES:%.*]], align 4
 ; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000000000000)
 ; CHECK-NEXT:    store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float 0x7FF0000000000000)
-; CHECK-NEXT:    store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float 0x7FF0000000000000, ptr [[MAXIMUMNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call float @llvm.minnum.f32(float %x, float 0x7FF0000000000000)
@@ -296,17 +282,14 @@ define void @minmax_pos_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr
 }
 
 ; Can optimize all minmax variants if the nnan flag is set
-; TODO maximumnum/minimumnum
 define void @minmax_pos_inf_nnan_v2f32(<2 x float> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_pos_inf_nnan_v2f32(
 ; CHECK-NEXT:    store <2 x float> [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    store <2 x float> splat (float 0x7FF0000000000000), ptr [[MAXNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    store <2 x float> [[X]], ptr [[MINIMUM_RES:%.*]], align 8
 ; CHECK-NEXT:    store <2 x float> splat (float 0x7FF0000000000000), ptr [[MAXIMUM_RES:%.*]], align 8
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call nnan <2 x float> @llvm.minimumnum.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X]])
-; CHECK-NEXT:    store <2 x float> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 8
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call nnan <2 x float> @llvm.maximumnum.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X]])
-; CHECK-NEXT:    store <2 x float> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> splat (float 0x7FF0000000000000), ptr [[MAXIMUMNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call nnan <2 x float> @llvm.minnum.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> %x)
@@ -333,10 +316,10 @@ define void @minmax_pos_inf_nnan_v2f32(<2 x float> %x, ptr %minnum_res, ptr %max
 ; maxnum(X, -inf) -> X if nnan
 ; minimum(X, -inf) -> -inf if nnan
 ; maximum(X, -inf) -> X (Ignoring NaN quieting)
-; TODO: minimumnum(X, -inf) -> -inf
-; TODO: maximumnum(X, -inf) -> X if nnan
+; minimumnum(X, -inf) -> -inf
+; maximumnum(X, -inf) -> X if nnan
 
-; Can only optimize minnum and maximum without the nnan flag
+; Can only optimize minnum, maximum, and minimumnum without the nnan flag
 define void @minmax_neg_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_neg_inf_f32(
 ; CHECK-NEXT:    store float 0xFFF0000000000000, ptr [[MINNUM_RES:%.*]], align 4
@@ -345,8 +328,7 @@ define void @minmax_neg_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr
 ; CHECK-NEXT:    [[MINIMUM:%.*]] = call float @llvm.minimum.f32(float [[X]], float 0xFFF0000000000000)
 ; CHECK-NEXT:    store float [[MINIMUM]], ptr [[MINIMUM_RES:%.*]], align 4
 ; CHECK-NEXT:    store float [[X]], ptr [[MAXIMUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0xFFF0000000000000)
-; CHECK-NEXT:    store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float 0xFFF0000000000000, ptr [[MINIMUMNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float 0xFFF0000000000000)
 ; CHECK-NEXT:    store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -369,17 +351,14 @@ define void @minmax_neg_inf_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr
 }
 
 ; Can optimize all minmax variants if the nnan flag is set
-; TODO maximumnum/minimumnum
 define void @minmax_neg_inf_nnan_v2f64(<2 x double> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_neg_inf_nnan_v2f64(
 ; CHECK-NEXT:    store <2 x double> splat (double 0xFFF0000000000000), ptr [[MINNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <2 x double> [[X:%.*]], ptr [[MAXNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <2 x double> splat (double 0xFFF0000000000000), ptr [[MINIMUM_RES:%.*]], align 16
 ; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MAXIMUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call nnan <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[X]], <2 x double> splat (double 0xFFF0000000000000))
-; CHECK-NEXT:    store <2 x double> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 16
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call nnan <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[X]], <2 x double> splat (double 0xFFF0000000000000))
-; CHECK-NEXT:    store <2 x double> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> splat (double 0xFFF0000000000000), ptr [[MINIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> splat (double 0xFFF0000000000000))
@@ -406,8 +385,8 @@ define void @minmax_neg_inf_nnan_v2f64(<2 x double> %x, ptr %minnum_res, ptr %ma
 ; minnum(X, +largest) -> X if ninf && nnan
 ; maximum(X, +largest) -> +largest if ninf && nnan
 ; minimum(X, +largest) -> X if ninf (ignoring quieting of sNaNs)
-; TODO: maximumnum(X, +largest) -> +largest if ninf && nnan
-; TODO: minimumnum(X, +largest) -> X if ninf && nnan
+; maximumnum(X, +largest) -> +largest if ninf
+; minimumnum(X, +largest) -> X if ninf && nnan
 
 ; None of these should be optimized away without the nnan/ninf flags
 define void @minmax_largest_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
@@ -443,7 +422,7 @@ define void @minmax_largest_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr
   ret void
 }
 
-; We can optimize maxnum and minimum if we know ninf is set
+; We can optimize maxnum, minimum, and maximumnum if we know ninf is set
 define void @minmax_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_largest_f32_ninf(
 ; CHECK-NEXT:    [[MINNUM:%.*]] = call ninf float @llvm.minnum.f32(float [[X:%.*]], float 0x47EFFFFFE0000000)
@@ -454,8 +433,7 @@ define void @minmax_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_res,
 ; CHECK-NEXT:    store float [[MAXIMUM]], ptr [[MAXIMUM_RES:%.*]], align 4
 ; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call ninf float @llvm.minimumnum.f32(float [[X]], float 0x47EFFFFFE0000000)
 ; CHECK-NEXT:    store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call ninf float @llvm.maximumnum.f32(float [[X]], float 0x47EFFFFFE0000000)
-; CHECK-NEXT:    store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float 0x47EFFFFFE0000000, ptr [[MAXIMUMNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call ninf float @llvm.minnum.f32(float %x, float 0x47EFFFFFE0000000)
@@ -476,17 +454,14 @@ define void @minmax_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_res,
 }
 
 ; All can be optimized if both the ninf and nnan flags are set (ignoring SNaN propagation in minnum/maxnum)
-; TODO maximumnum/minimumnum
 define void @minmax_largest_v2f32_ninf_nnan(<2 x float> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_largest_v2f32_ninf_nnan(
 ; CHECK-NEXT:    store <2 x float> [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    store <2 x float> splat (float 0x47EFFFFFE0000000), ptr [[MAXNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    store <2 x float> [[X]], ptr [[MINIMUM_RES:%.*]], align 8
 ; CHECK-NEXT:    store <2 x float> splat (float 0x47EFFFFFE0000000), ptr [[MAXIMUM_RES:%.*]], align 8
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call nnan ninf <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[X]], <2 x float> splat (float 0x47EFFFFFE0000000))
-; CHECK-NEXT:    store <2 x float> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 8
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call nnan ninf <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[X]], <2 x float> splat (float 0x47EFFFFFE0000000))
-; CHECK-NEXT:    store <2 x float> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> splat (float 0x47EFFFFFE0000000), ptr [[MAXIMUMNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call ninf nnan <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> splat (float 0x47EFFFFFE0000000))
@@ -513,8 +488,8 @@ define void @minmax_largest_v2f32_ninf_nnan(<2 x float> %x, ptr %minnum_res, ptr
 ; minnum(X, -largest) -> -largest if ninf (ignoring SNaN -> QNaN propagation)
 ; maximum(X, -largest) -> X if ninf (ignoring quieting of sNaNs)
 ; minimum(X, -largest) -> -largest if ninf && nnan
-; TODO: maximumnum(X, -largest) -> X if ninf && nnan
-; TODO: minimumnum(X, -largest) -> -largest if ninf
+; maximumnum(X, -largest) -> X if ninf && nnan
+; minimumnum(X, -largest) -> -largest if ninf
 
 ; None of these should be optimized away without the nnan/ninf flags
 define void @minmax_neg_largest_f32(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
@@ -550,7 +525,7 @@ define void @minmax_neg_largest_f32(float %x, ptr %minnum_res, ptr %maxnum_res,
   ret void
 }
 
-; We can optimize minnum and maximum if we know ninf is set
+; We can optimize minnum, maximum, and minimumnum if we know ninf is set
 define void @minmax_neg_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_neg_largest_f32_ninf(
 ; CHECK-NEXT:    store float 0xC7EFFFFFE0000000, ptr [[MINNUM_RES:%.*]], align 4
@@ -559,8 +534,7 @@ define void @minmax_neg_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_
 ; CHECK-NEXT:    [[MINIMUM:%.*]] = call ninf float @llvm.minimum.f32(float [[X]], float 0xC7EFFFFFE0000000)
 ; CHECK-NEXT:    store float [[MINIMUM]], ptr [[MINIMUM_RES:%.*]], align 4
 ; CHECK-NEXT:    store float [[X]], ptr [[MAXIMUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call ninf float @llvm.minimumnum.f32(float [[X]], float 0xC7EFFFFFE0000000)
-; CHECK-NEXT:    store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float 0xC7EFFFFFE0000000, ptr [[MINIMUMNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call ninf float @llvm.maximumnum.f32(float [[X]], float 0xC7EFFFFFE0000000)
 ; CHECK-NEXT:    store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -583,17 +557,14 @@ define void @minmax_neg_largest_f32_ninf(float %x, ptr %minnum_res, ptr %maxnum_
 }
 
 ; All can be optimized if both the ninf and nnan flags are set (ignoring SNaN propagation in minnum/maxnum)
-; TODO maximumnum/minimumnum
 define void @minmax_neg_largest_nxv2f32_nnan_ninf(<vscale x 2 x float> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
 ; CHECK-LABEL: @minmax_neg_largest_nxv2f32_nnan_ninf(
 ; CHECK-NEXT:    store <vscale x 2 x float> splat (float 0xC7EFFFFFE0000000), ptr [[MINNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    store <vscale x 2 x float> [[X:%.*]], ptr [[MAXNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    store <vscale x 2 x float> splat (float 0xC7EFFFFFE0000000), ptr [[MINIMUM_RES:%.*]], align 8
 ; CHECK-NEXT:    store <vscale x 2 x float> [[X]], ptr [[MAXIMUM_RES:%.*]], align 8
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call nnan ninf <vscale x 2 x float> @llvm.minimumnum.nxv2f32(<vscale x 2 x float> [[X]], <vscale x 2 x float> splat (float 0xC7EFFFFFE0000000))
-; CHECK-NEXT:    store <vscale x 2 x float> [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 8
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call nnan ninf <vscale x 2 x float> @llvm.maximumnum.nxv2f32(<vscale x 2 x float> [[X]], <vscale x 2 x float> splat (float 0xC7EFFFFFE0000000))
-; CHECK-NEXT:    store <vscale x 2 x float> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    store <vscale x 2 x float> splat (float 0xC7EFFFFFE0000000), ptr [[MINIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    store <vscale x 2 x float> [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call nnan ninf <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> splat (float 0xC7EFFFFFE0000000))
@@ -614,6 +585,80 @@ define void @minmax_neg_largest_nxv2f32_nnan_ninf(<vscale x 2 x float> %x, ptr %
 }
 
 ;###############################################################
+;#                  Mixed Constant Vector Elements             #
+;###############################################################
+; Tests elementwise handling of different combinations of the above optimizable constants
+
+; Test with vector variants (v2f64) with +Inf and poison
+; Poison element allows for flexibility to choose either X or <poison, +Inf> where applicable
+define void @minmax_mixed_pos_inf_poison_v2f64_nnan(<2 x double> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
+; CHECK-LABEL: @minmax_mixed_pos_inf_poison_v2f64_nnan(
+; CHECK-NEXT:    store <2 x double> [[X:%.*]], ptr [[MINNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> <double poison, double 0x7FF0000000000000>, ptr [[MAXNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MINIMUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> <double poison, double 0x7FF0000000000000>, ptr [[MAXIMUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <2 x double> <double poison, double 0x7FF0000000000000>, ptr [[MAXIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %minnum = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> <double poison, double 0x7FF0000000000000>, <2 x double> %x)
+  store <2 x double> %minnum, ptr %minnum_res
+  %maxnum = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> <double poison, double 0x7FF0000000000000>, <2 x double> %x)
+  store <2 x double> %maxnum, ptr %maxnum_res
+
+  %minimum = call nnan <2 x double> @llvm.minimum.v2f64(<2 x double> <double poison, double 0x7FF0000000000000>, <2 x double> %x)
+  store <2 x double> %minimum, ptr %minimum_res
+  %maximum = call nnan <2 x double> @llvm.maximum.v2f64(<2 x double> <double poison, double 0x7FF0000000000000>, <2 x double> %x)
+  store <2 x double> %maximum, ptr %maximum_res
+
+  %minimumnum = call nnan <2 x double> @llvm.minimumnum.v2f64(<2 x double> <double poison, double 0x7FF0000000000000>, <2 x double> %x)
+  store <2 x double> %minimumnum, ptr %minimumnum_res
+  %maximumnum = call nnan <2 x double> @llvm.maximumnum.v2f64(<2 x double> <double poison, double 0x7FF0000000000000>, <2 x double> %x)
+  store <2 x double> %maximumnum, ptr %maximumnum_res
+  ret void
+}
+
+; Tests to show that we can optimize different classes of constatn (inf/nan/poison) in different vector elements.
+; We can only optimize if the result would be choosing all elements of the input X, or all constant elements though
+; (where poison allows us to choose either).
+;
+; nnan minnum(<poison, +Inf, SNaN>, X) = <???, X1, QNaN> (Cannot mix elements from X and constant vector)
+; nnan maxnum(<poison, +Inf, SNaN>, X) = <poison +Inf, QNaN>
+; nnan minimum(<poison, +Inf, SNaN>, X) = <???, X1, QNaN> (Cannot mix elements from X and constant vector)
+; nnan maximum(<poison, +Inf, SNaN>, X) = <poison +Inf, QNaN>
+; nnan minimumnum(<poison, +Inf, SNaN>, X) = <X0, X1, X2> (Poison can be either X or constant value)
+; nnan maximumnum(<poison, +Inf, SNaN>, X) = <???, +Inf, X2>
+define void @minmax_mixed_pos_inf_poison_snan_v3f32(<3 x float> %x, ptr %minnum_res, ptr %maxnum_res, ptr %minimum_res, ptr %maximum_res, ptr %minimumnum_res, ptr %maximumnum_res) {
+; CHECK-LABEL: @minmax_mixed_pos_inf_poison_snan_v3f32(
+; CHECK-NEXT:    [[MINNUM:%.*]] = call nnan <3 x float> @llvm.minnum.v3f32(<3 x float> <float poison, float 0x7FF0000000000000, float 0x7FF4000000000000>, <3 x float> [[X:%.*]])
+; CHECK-NEXT:    store <3 x float> [[MINNUM]], ptr [[MINNUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <3 x float> <float poison, float 0x7FF0000000000000, float 0x7FFC000000000000>, ptr [[MAXNUM_RES:%.*]], align 16
+; CHECK-NEXT:    [[MINIMUM:%.*]] = call nnan <3 x float> @llvm.minimum.v3f32(<3 x float> <float poison, float 0x7FF0000000000000, float 0x7FF4000000000000>, <3 x float> [[X]])
+; CHECK-NEXT:    store <3 x float> [[MINIMUM]], ptr [[MINIMUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <3 x float> <float poison, float 0x7FF0000000000000, float 0x7FFC000000000000>, ptr [[MAXIMUM_RES:%.*]], align 16
+; CHECK-NEXT:    store <3 x float> [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call nnan <3 x float> @llvm.maximumnum.v3f32(<3 x float> <float poison, float 0x7FF0000000000000, float 0x7FF4000000000000>, <3 x float> [[X]])
+; CHECK-NEXT:    store <3 x float> [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %minnum = call nnan <3 x float> @llvm.minnum.v3f32(<3 x float> <float poison, float 0x7FF0000000000000, float 0x7FF4000000000000>, <3 x float> %x)
+  store <3 x float> %minnum, ptr %minnum_res
+  %maxnum = call nnan <3 x float> @llvm.maxnum.v3f32(<3 x float> <float poison, float 0x7FF0000000000000, float 0x7FF4000000000000>, <3 x float> %x)
+  store <3 x float> %maxnum, ptr %maxnum_res
+
+  %minimum = call nnan <3 x float> @llvm.minimum.v3f32(<3 x float> <float poison, float 0x7FF0000000000000, float 0x7FF4000000000000>, <3 x float> %x)
+  store <3 x float> %minimum, ptr %minimum_res
+  %maximum = call nnan <3 x float> @llvm.maximum.v3f32(<3 x float> <float poison, float 0x7FF0000000000000, float 0x7FF4000000000000>, <3 x float> %x)
+  store <3 x float> %maximum, ptr %maximum_res
+
+  %minimumnum = call nnan <3 x float> @llvm.minimumnum.v3f32(<3 x float> <float poison, float 0x7FF0000000000000, float 0x7FF4000000000000>, <3 x float> %x)
+  store <3 x float> %minimumnum, ptr %minimumnum_res
+  %maximumnum = call nnan <3 x float> @llvm.maximumnum.v3f32(<3 x float> <float poison, float 0x7FF0000000000000, float 0x7FF4000000000000>, <3 x float> %x)
+  store <3 x float> %maximumnum, ptr %maximumnum_res
+  ret void
+}
+
+;###############################################################
 ;#                    Min(x, x) / Max(x, x)                    #
 ;###############################################################
 ; min(x, x) -> x and max(x, x) -> x for all variants (ignoring SNaN quieting)
@@ -623,10 +668,8 @@ define void @minmax_same_args(float %x, ptr %minnum_res, ptr %maxnum_res, ptr %m
 ; CHECK-NEXT:    store float [[X]], ptr [[MAXNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    store float [[X]], ptr [[MINIMUM_RES:%.*]], align 4
 ; CHECK-NEXT:    store float [[X]], ptr [[MAXIMUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MINIMUMNUM:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float [[X]])
-; CHECK-NEXT:    store float [[MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 4
-; CHECK-NEXT:    [[MAXIMUMNUM:%.*]] = call float @llvm.maximumnum.f32(float [[X]], float [[X]])
-; CHECK-NEXT:    store float [[MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float [[X]], ptr [[MINIMUMNUM_RES:%.*]], align 4
+; CHECK-NEXT:    store float [[X]], ptr [[MAXIMUMNUM_RES:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %minnum = call float @llvm.minnum.f32(float %x, float %x)
@@ -660,11 +703,9 @@ define void @minmax_x_minmax_xy(<2 x float> %x, <2 x float> %y, ptr %minnum_res,
 ; CHECK-NEXT:    [[MAXIMUM_XY:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> [[X]], <2 x float> [[Y]])
 ; CHECK-NEXT:    store <2 x float> [[MAXIMUM_XY]], ptr [[MAXIMUM_RES:%.*]], align 8
 ; CHECK-NEXT:    [[MINIMUMNUM_XY:%.*]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[X]], <2 x float> [[Y]])
-; CHECK-NEXT:    [[MINIMUMNUM_NESTED:%.*]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[X]], <2 x float> [[MINIMUMNUM_XY]])
-; CHECK-NEXT:    store <2 x float> [[MINIMUMNUM_NESTED]], ptr [[MINIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> [[MINIMUMNUM_XY]], ptr [[MINIMUMNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    [[MAXIMUMNUM_XY:%.*]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[X]], <2 x float> [[Y]])
-; CHECK-NEXT:    [[MAXIMUMNUM_NESTED:%.*]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[X]], <2 x float> [[MAXIMUMNUM_XY]])
-; CHECK-NEXT:    store <2 x float> [[MAXIMUMNUM_NESTED]], ptr [[MAXIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> [[MAXIMUMNUM_XY]], ptr [[MAXIMUMNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %minnum_xy = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y)
@@ -758,13 +799,9 @@ define void @minmax_minmax_xy_minmax_yx(half %x, half %y, ptr %minnum_res, ptr %
 ; CHECK-NEXT:    [[MAXIMUM_XY:%.*]] = call half @llvm.maximum.f16(half [[X]], half [[Y]])
 ; CHECK-NEXT:    store half [[MAXIMUM_XY]], ptr [[MAXIMUM_RES:%.*]], align 2
 ; CHECK-NEXT:    [[MINIMUMNUM_XY:%.*]] = call half @llvm.minimumnum.f16(half [[X]], half [[Y]])
-; CHECK-NEXT:    [[MINIMUMNUM_YX:%.*]] = call half @llvm.minimumnum.f16(half [[Y]], half [[X]])
-; CHECK-NEXT:    [[FINAL_MINIMUMNUM:%.*]] = call half @llvm.minimumnum.f16(half [[MINIMUMNUM_XY]], half [[MINIMUMNUM_YX]])
-; CHECK-NEXT:    store half [[FINAL_MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 2
+; CHECK-NEXT:    store half [[MINIMUMNUM_XY]], ptr [[MINIMUMNUM_RES:%.*]], align 2
 ; CHECK-NEXT:    [[MAXIMUMNUM_XY:%.*]] = call half @llvm.maximumnum.f16(half [[X]], half [[Y]])
-; CHECK-NEXT:    [[MAXIMUMNUM_YX:%.*]] = call half @llvm.maximumnum.f16(half [[Y]], half [[X]])
-; CHECK-NEXT:    [[FINAL_MAXIMUMNUM:%.*]] = call half @llvm.maximumnum.f16(half [[MAXIMUMNUM_XY]], half [[MAXIMUMNUM_YX]])
-; CHECK-NEXT:    store half [[FINAL_MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 2
+; CHECK-NEXT:    store half [[MAXIMUMNUM_XY]], ptr [[MAXIMUMNUM_RES:%.*]], align 2
 ; CHECK-NEXT:    ret void
 ;
   %minnum_xy = call half @llvm.minnum.f16(half %x, half %y)
@@ -812,13 +849,9 @@ define void @minmax_minmax_xy_maxmin_yx(double %x, double %y, ptr %minnum_res, p
 ; CHECK-NEXT:    [[MAXIMUM_XY:%.*]] = call double @llvm.maximum.f64(double [[Y]], double [[X]])
 ; CHECK-NEXT:    store double [[MAXIMUM_XY]], ptr [[MAXIMUM_RES:%.*]], align 8
 ; CHECK-NEXT:    [[MINIMUMNUM_XY:%.*]] = call double @llvm.minimumnum.f64(double [[Y]], double [[X]])
-; CHECK-NEXT:    [[MAXIMUMNUM_XY:%.*]] = call double @llvm.maximumnum.f64(double [[X]], double [[Y]])
-; CHECK-NEXT:    [[FINAL_MINIMUMNUM:%.*]] = call double @llvm.minimumnum.f64(double [[MINIMUMNUM_XY]], double [[MAXIMUMNUM_XY]])
-; CHECK-NEXT:    store double [[FINAL_MINIMUMNUM]], ptr [[MINIMUMNUM_RES:%.*]], align 8
-; CHECK-NEXT:    [[MAXIMUMNUM_XY1:%.*]] = call double @llvm.maximumnum.f64(double [[Y]], double [[X]])
-; CHECK-NEXT:    [[MINIMUMNUM_YX:%.*]] = call double @llvm.minimumnum.f64(double [[X]], double [[Y]])
-; CHECK-NEXT:    [[FINAL_MAXIMUMNUM:%.*]] = call double @llvm.maximumnum.f64(double [[MAXIMUMNUM_XY1]], double [[MINIMUMNUM_YX]])
-; CHECK-NEXT:    store double [[FINAL_MAXIMUMNUM]], ptr [[MAXIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    store double [[MINIMUMNUM_XY]], ptr [[MINIMUMNUM_RES:%.*]], align 8
+; CHECK-NEXT:    [[MAXIMUMNUM_XY:%.*]] = call double @llvm.maximumnum.f64(double [[Y]], double [[X]])
+; CHECK-NEXT:    store double [[MAXIMUMNUM_XY]], ptr [[MAXIMUMNUM_RES:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %minnum_xy = call double @llvm.minnum.f64(double %x, double %y)
diff --git a/llvm/test/Transforms/InstSimplify/ptrtoint.ll b/llvm/test/Transforms/InstSimplify/ptrtoint.ll
index 7346187..3b0e052 100644
--- a/llvm/test/Transforms/InstSimplify/ptrtoint.ll
+++ b/llvm/test/Transforms/InstSimplify/ptrtoint.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -passes=instsimplify < %s | FileCheck %s
 
+target datalayout = "p1:128:128:128"
+
 define i64 @ptrtoint_gep_sub(ptr %ptr, i64 %end.addr) {
 ; CHECK-LABEL: define i64 @ptrtoint_gep_sub(
 ; CHECK-SAME: ptr [[PTR:%.*]], i64 [[END_ADDR:%.*]]) {
@@ -136,3 +138,15 @@ define i128 @ptrtoint_gep_sub_wide_type(ptr %ptr, i128 %end.addr) {
   %end.addr2 = ptrtoint ptr %end to i128
   ret i128 %end.addr2
 }
+
+define ptr addrspace(1) @inttoptr_of_ptrtoint_wide(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define ptr addrspace(1) @inttoptr_of_ptrtoint_wide(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[INT:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
+; CHECK-NEXT:    [[PTR2:%.*]] = inttoptr i64 [[INT]] to ptr addrspace(1)
+; CHECK-NEXT:    ret ptr addrspace(1) [[PTR2]]
+;
+  %int = ptrtoint ptr addrspace(1) %ptr to i64
+  %ptr2 = inttoptr i64 %int to ptr addrspace(1)
+  ret ptr addrspace(1) %ptr2
+}
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll
index 3b69527..2e4fc55 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll
@@ -15,7 +15,7 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) {
 ; APPLE-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 7
 ; APPLE-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
-; APPLE-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; APPLE:       [[ENTRY_NEW]]:
 ; APPLE-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; APPLE-NEXT:    br label %[[LOOP:.*]]
@@ -72,18 +72,18 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) {
 ; APPLE-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV_EPIL]], 8
 ; APPLE-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; APPLE-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; APPLE-NEXT:    br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
-; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; APPLE-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[LOOP]] ]
-; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; APPLE-NEXT:    br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
 ; APPLE:       [[EXIT_UNR_LCSSA]]:
-; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[LOOP]] ]
 ; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; APPLE:       [[LOOP_EPIL_PREHEADER]]:
+; APPLE-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; APPLE-NEXT:    br label %[[LOOP_EPIL:.*]]
 ; APPLE:       [[LOOP_EPIL]]:
-; APPLE-NEXT:    [[IV_EPIL1:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL1:%.*]], %[[LOOP_EPIL]] ]
+; APPLE-NEXT:    [[IV_EPIL1:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL1:%.*]], %[[LOOP_EPIL]] ]
 ; APPLE-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
 ; APPLE-NEXT:    [[SCALED_IV_EPIL1:%.*]] = mul nuw nsw i64 [[IV_EPIL1]], [[SCALE]]
 ; APPLE-NEXT:    [[GEP_SRC_EPIL1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL1]]
@@ -106,7 +106,7 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) {
 ; OTHER-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; OTHER-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; OTHER-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; OTHER-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; OTHER-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; OTHER:       [[ENTRY_NEW]]:
 ; OTHER-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; OTHER-NEXT:    br label %[[LOOP:.*]]
@@ -127,15 +127,15 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) {
 ; OTHER-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; OTHER-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; OTHER-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; OTHER-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
-; OTHER:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; OTHER-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
-; OTHER-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; OTHER-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
 ; OTHER:       [[EXIT_UNR_LCSSA]]:
-; OTHER-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; OTHER-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
 ; OTHER-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; OTHER-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; OTHER-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; OTHER:       [[LOOP_EPIL_PREHEADER]]:
+; OTHER-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR1]], %[[EXIT_UNR_LCSSA]] ]
+; OTHER-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; OTHER-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; OTHER-NEXT:    br label %[[LOOP_EPIL:.*]]
 ; OTHER:       [[LOOP_EPIL]]:
 ; OTHER-NEXT:    [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
@@ -172,7 +172,7 @@ define void @load_op_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k
 ; APPLE-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; APPLE-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; APPLE-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; APPLE:       [[ENTRY_NEW]]:
 ; APPLE-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; APPLE-NEXT:    br label %[[LOOP:.*]]
@@ -195,15 +195,15 @@ define void @load_op_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k
 ; APPLE-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; APPLE-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; APPLE-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; APPLE-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
-; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; APPLE-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
-; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; APPLE-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
 ; APPLE:       [[EXIT_UNR_LCSSA]]:
-; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; APPLE-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
 ; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; APPLE:       [[LOOP_EPIL_PREHEADER]]:
+; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR1]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; APPLE-NEXT:    br label %[[LOOP_EPIL:.*]]
 ; APPLE:       [[LOOP_EPIL]]:
 ; APPLE-NEXT:    [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
@@ -222,7 +222,7 @@ define void @load_op_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k
 ; OTHER-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; OTHER-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; OTHER-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; OTHER-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; OTHER-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; OTHER:       [[ENTRY_NEW]]:
 ; OTHER-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; OTHER-NEXT:    br label %[[LOOP:.*]]
@@ -245,15 +245,15 @@ define void @load_op_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k
 ; OTHER-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; OTHER-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; OTHER-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; OTHER-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
-; OTHER:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; OTHER-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
-; OTHER-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; OTHER-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
 ; OTHER:       [[EXIT_UNR_LCSSA]]:
-; OTHER-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; OTHER-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
 ; OTHER-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; OTHER-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; OTHER-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; OTHER:       [[LOOP_EPIL_PREHEADER]]:
+; OTHER-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR1]], %[[EXIT_UNR_LCSSA]] ]
+; OTHER-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; OTHER-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; OTHER-NEXT:    br label %[[LOOP_EPIL:.*]]
 ; OTHER:       [[LOOP_EPIL]]:
 ; OTHER-NEXT:    [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_UNR]], [[SCALE]]
@@ -375,7 +375,7 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x
 ; APPLE-NEXT:    [[TMP1:%.*]] = add i64 [[N]], -2
 ; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 3
 ; APPLE-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 3
-; APPLE-NEXT:    br i1 [[TMP2]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE-NEXT:    br i1 [[TMP2]], label %[[LOOP_HEADER_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; APPLE:       [[ENTRY_NEW]]:
 ; APPLE-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
 ; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
@@ -439,7 +439,7 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x
 ; APPLE-NEXT:    [[GEP_4_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_1]], i64 4
 ; APPLE-NEXT:    [[L_2_1:%.*]] = load i8, ptr [[GEP_4_1]], align 4
 ; APPLE-NEXT:    [[C_2_1:%.*]] = icmp ugt i8 [[L_2_1]], 7
-; APPLE-NEXT:    br i1 [[C_2_1]], label %[[MERGE_11:.*]], label %[[ELSE_1:.*]]
+; APPLE-NEXT:    br i1 [[C_2_1]], label %[[MERGE_12:.*]], label %[[ELSE_1:.*]]
 ; APPLE:       [[ELSE_1]]:
 ; APPLE-NEXT:    [[CONV_I_1:%.*]] = zext nneg i8 [[L_2_1]] to i64
 ; APPLE-NEXT:    [[GEP_A_1:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_1]]
@@ -449,8 +449,8 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x
 ; APPLE-NEXT:    [[L_4_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
 ; APPLE-NEXT:    [[GEP_C_1:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_1]]
 ; APPLE-NEXT:    [[L_5_1:%.*]] = load i32, ptr [[GEP_C_1]], align 4
-; APPLE-NEXT:    br label %[[MERGE_11]]
-; APPLE:       [[MERGE_11]]:
+; APPLE-NEXT:    br label %[[MERGE_12]]
+; APPLE:       [[MERGE_12]]:
 ; APPLE-NEXT:    [[MERGE_1_1:%.*]] = phi i32 [ 0, %[[THEN_1]] ], [ [[L_4_1]], %[[ELSE_1]] ]
 ; APPLE-NEXT:    [[MERGE_2_1:%.*]] = phi i32 [ 0, %[[THEN_1]] ], [ [[L_5_1]], %[[ELSE_1]] ]
 ; APPLE-NEXT:    [[ADD14_1:%.*]] = add nsw i32 [[MERGE_2_1]], [[X]]
@@ -488,7 +488,7 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x
 ; APPLE-NEXT:    [[GEP_4_2:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_2]], i64 4
 ; APPLE-NEXT:    [[L_2_2:%.*]] = load i8, ptr [[GEP_4_2]], align 4
 ; APPLE-NEXT:    [[C_2_2:%.*]] = icmp ugt i8 [[L_2_2]], 7
-; APPLE-NEXT:    br i1 [[C_2_2]], label %[[MERGE_22:.*]], label %[[ELSE_2:.*]]
+; APPLE-NEXT:    br i1 [[C_2_2]], label %[[MERGE_23:.*]], label %[[ELSE_2:.*]]
 ; APPLE:       [[ELSE_2]]:
 ; APPLE-NEXT:    [[CONV_I_2:%.*]] = zext nneg i8 [[L_2_2]] to i64
 ; APPLE-NEXT:    [[GEP_A_2:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_2]]
@@ -498,8 +498,8 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x
 ; APPLE-NEXT:    [[L_4_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
 ; APPLE-NEXT:    [[GEP_C_2:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_2]]
 ; APPLE-NEXT:    [[L_5_2:%.*]] = load i32, ptr [[GEP_C_2]], align 4
-; APPLE-NEXT:    br label %[[MERGE_22]]
-; APPLE:       [[MERGE_22]]:
+; APPLE-NEXT:    br label %[[MERGE_23]]
+; APPLE:       [[MERGE_23]]:
 ; APPLE-NEXT:    [[MERGE_1_2:%.*]] = phi i32 [ 0, %[[THEN_2]] ], [ [[L_4_2]], %[[ELSE_2]] ]
 ; APPLE-NEXT:    [[MERGE_2_2:%.*]] = phi i32 [ 0, %[[THEN_2]] ], [ [[L_5_2]], %[[ELSE_2]] ]
 ; APPLE-NEXT:    [[ADD14_2:%.*]] = add nsw i32 [[MERGE_2_2]], [[X]]
@@ -580,18 +580,18 @@ define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x
 ; APPLE-NEXT:    [[IV_NEXT_3]] = add nuw nsw i64 [[IV_EPIL]], 4
 ; APPLE-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
 ; APPLE-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; APPLE-NEXT:    br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_HEADER]]
-; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; APPLE-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP_LATCH_3]] ]
-; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; APPLE-NEXT:    br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_HEADER]]
 ; APPLE:       [[EXIT_UNR_LCSSA]]:
-; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP_LATCH_3]] ]
 ; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; APPLE:       [[LOOP_HEADER_EPIL_PREHEADER]]:
+; APPLE-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; APPLE-NEXT:    br label %[[LOOP_HEADER_EPIL:.*]]
 ; APPLE:       [[LOOP_HEADER_EPIL]]:
-; APPLE-NEXT:    [[IV_EPIL1:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL1:%.*]], %[[LOOP_LATCH_EPIL:.*]] ]
+; APPLE-NEXT:    [[IV_EPIL1:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL1:%.*]], %[[LOOP_LATCH_EPIL:.*]] ]
 ; APPLE-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_LATCH_EPIL]] ]
 ; APPLE-NEXT:    [[GEP_EPIL1:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_EPIL1]]
 ; APPLE-NEXT:    [[L_1_EPIL1:%.*]] = load i32, ptr [[GEP_EPIL1]], align 4
@@ -1034,7 +1034,7 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
 ; APPLE-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 3
 ; APPLE-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
-; APPLE-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; APPLE:       [[ENTRY_NEW]]:
 ; APPLE-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; APPLE-NEXT:    br label %[[LOOP:.*]]
@@ -1063,26 +1063,25 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
 ; APPLE-NEXT:    [[IV_NEXT_3]] = add nuw nsw i64 [[IV_EPIL]], 4
 ; APPLE-NEXT:    [[NITER_NEXT_3]] = add nuw i64 [[NITER]], 4
 ; APPLE-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; APPLE-NEXT:    br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
-; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; APPLE-NEXT:    [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
-; APPLE-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
-; APPLE-NEXT:    [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
+; APPLE-NEXT:    br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
+; APPLE:       [[EXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[RES_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
+; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
+; APPLE-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
 ; APPLE-NEXT:    [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
 ; APPLE-NEXT:    [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
 ; APPLE-NEXT:    [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]]
-; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
-; APPLE:       [[EXIT_UNR_LCSSA]]:
-; APPLE-NEXT:    [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
-; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
-; APPLE-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; APPLE:       [[LOOP_EPIL_PREHEADER]]:
+; APPLE-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[RDX_EPIL_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; APPLE-NEXT:    br label %[[LOOP_EPIL:.*]]
 ; APPLE:       [[LOOP_EPIL]]:
-; APPLE-NEXT:    [[IV_EPIL1:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
-; APPLE-NEXT:    [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
+; APPLE-NEXT:    [[IV_EPIL1:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
+; APPLE-NEXT:    [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
 ; APPLE-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
 ; APPLE-NEXT:    [[GEP_A_EPIL1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL1]]
 ; APPLE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[GEP_A_EPIL1]], align 2
@@ -1096,7 +1095,7 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
 ; APPLE-NEXT:    [[RES_PH1:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
 ; APPLE-NEXT:    br label %[[EXIT]]
 ; APPLE:       [[EXIT]]:
-; APPLE-NEXT:    [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ]
+; APPLE-NEXT:    [[RES:%.*]] = phi i32 [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ]
 ; APPLE-NEXT:    ret i32 [[RES]]
 ;
 ; OTHER-LABEL: define i32 @test_add_reduction_runtime(
@@ -1105,7 +1104,7 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
 ; OTHER-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; OTHER-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 3
 ; OTHER-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
-; OTHER-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; OTHER-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; OTHER:       [[ENTRY_NEW]]:
 ; OTHER-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; OTHER-NEXT:    br label %[[LOOP:.*]]
@@ -1131,23 +1130,22 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
 ; OTHER-NEXT:    [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
 ; OTHER-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
 ; OTHER-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; OTHER-NEXT:    br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
-; OTHER:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; OTHER-NEXT:    [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
-; OTHER-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
-; OTHER-NEXT:    [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
-; OTHER-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; OTHER-NEXT:    br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
 ; OTHER:       [[EXIT_UNR_LCSSA]]:
-; OTHER-NEXT:    [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
-; OTHER-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
-; OTHER-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; OTHER-NEXT:    [[RES_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
+; OTHER-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
+; OTHER-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
 ; OTHER-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; OTHER-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; OTHER-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; OTHER:       [[LOOP_EPIL_PREHEADER]]:
+; OTHER-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; OTHER-NEXT:    [[RDX_EPIL_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; OTHER-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; OTHER-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; OTHER-NEXT:    br label %[[LOOP_EPIL:.*]]
 ; OTHER:       [[LOOP_EPIL]]:
-; OTHER-NEXT:    [[IV_EPIL:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
-; OTHER-NEXT:    [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
+; OTHER-NEXT:    [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
+; OTHER-NEXT:    [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
 ; OTHER-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
 ; OTHER-NEXT:    [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]]
 ; OTHER-NEXT:    [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
index b8215d9..66c55f2 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
@@ -69,12 +69,14 @@ define void @runtime_unroll_generic(i32 %arg_0, ptr %arg_1, ptr %arg_2, ptr %arg
 ; CHECK-A55-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; CHECK-A55-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
 ; CHECK-A55-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-A55-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY6]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-A55-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA1:%.*]], label [[FOR_BODY6]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-A55:       for.end.loopexit.unr-lcssa:
-; CHECK-A55-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY6_PREHEADER]] ], [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY6]] ]
 ; CHECK-A55-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
-; CHECK-A55-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL:%.*]]
-; CHECK-A55:       for.body6.epil:
+; CHECK-A55-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_END]], label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; CHECK-A55:       for.body6.epil.preheader:
+; CHECK-A55-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY6_PREHEADER]] ], [ [[INDVARS_IV_NEXT_3]], [[FOR_END_LOOPEXIT_UNR_LCSSA1]] ]
+; CHECK-A55-NEXT:    [[LCMP_MOD5:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-A55-NEXT:    tail call void @llvm.assume(i1 [[LCMP_MOD5]])
 ; CHECK-A55-NEXT:    [[ARRAYIDX10_EPIL:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_UNR]]
 ; CHECK-A55-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL]], align 2
 ; CHECK-A55-NEXT:    [[CONV_EPIL:%.*]] = sext i16 [[TMP13]] to i32
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/vector.ll b/llvm/test/Transforms/LoopUnroll/AArch64/vector.ll
index 38d559f..2bafa08 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/vector.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/vector.ll
@@ -9,7 +9,7 @@ define void @reverse(ptr %dst, ptr %src, i64 %len) {
 ; APPLE-NEXT:    [[TMP0:%.*]] = add i64 [[LEN]], -1
 ; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[LEN]], 7
 ; APPLE-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
-; APPLE-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE-NEXT:    br i1 [[TMP1]], label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; APPLE:       [[ENTRY_NEW]]:
 ; APPLE-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[LEN]], [[XTRAITER]]
 ; APPLE-NEXT:    br label %[[FOR_BODY:.*]]
@@ -66,18 +66,18 @@ define void @reverse(ptr %dst, ptr %src, i64 %len) {
 ; APPLE-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
 ; APPLE-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; APPLE-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; APPLE-NEXT:    br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_BODY]]
-; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; APPLE-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[FOR_BODY]] ]
-; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; APPLE-NEXT:    br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA:.*]], label %[[FOR_BODY]]
 ; APPLE:       [[EXIT_UNR_LCSSA]]:
-; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; APPLE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[FOR_BODY]] ]
 ; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_BODY_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; APPLE:       [[FOR_BODY_EPIL_PREHEADER]]:
+; APPLE-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; APPLE-NEXT:    br label %[[FOR_BODY_EPIL:.*]]
 ; APPLE:       [[FOR_BODY_EPIL]]:
-; APPLE-NEXT:    [[IV_EPIL:%.*]] = phi i64 [ [[IV_UNR]], %[[FOR_BODY_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[FOR_BODY_EPIL]] ]
+; APPLE-NEXT:    [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[FOR_BODY_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[FOR_BODY_EPIL]] ]
 ; APPLE-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[FOR_BODY_EPIL]] ]
 ; APPLE-NEXT:    [[TMP18:%.*]] = sub nsw i64 [[LEN]], [[IV_EPIL]]
 ; APPLE-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds <4 x float>, ptr [[SRC]], i64 [[TMP18]]
@@ -100,7 +100,7 @@ define void @reverse(ptr %dst, ptr %src, i64 %len) {
 ; CORTEXA55-NEXT:    [[TMP0:%.*]] = add i64 [[LEN]], -1
 ; CORTEXA55-NEXT:    [[XTRAITER:%.*]] = and i64 [[LEN]], 3
 ; CORTEXA55-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
-; CORTEXA55-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; CORTEXA55-NEXT:    br i1 [[TMP1]], label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; CORTEXA55:       [[ENTRY_NEW]]:
 ; CORTEXA55-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[LEN]], [[XTRAITER]]
 ; CORTEXA55-NEXT:    br label %[[FOR_BODY:.*]]
@@ -133,15 +133,15 @@ define void @reverse(ptr %dst, ptr %src, i64 %len) {
 ; CORTEXA55-NEXT:    [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
 ; CORTEXA55-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
 ; CORTEXA55-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CORTEXA55-NEXT:    br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_BODY]]
-; CORTEXA55:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; CORTEXA55-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[FOR_BODY]] ]
-; CORTEXA55-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; CORTEXA55-NEXT:    br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[FOR_BODY]]
 ; CORTEXA55:       [[EXIT_UNR_LCSSA]]:
-; CORTEXA55-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CORTEXA55-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[FOR_BODY]] ]
 ; CORTEXA55-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CORTEXA55-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; CORTEXA55-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_BODY_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; CORTEXA55:       [[FOR_BODY_EPIL_PREHEADER]]:
+; CORTEXA55-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR1]], %[[EXIT_UNR_LCSSA]] ]
+; CORTEXA55-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CORTEXA55-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CORTEXA55-NEXT:    br label %[[FOR_BODY_EPIL:.*]]
 ; CORTEXA55:       [[FOR_BODY_EPIL]]:
 ; CORTEXA55-NEXT:    [[TMP10:%.*]] = sub nsw i64 [[LEN]], [[IV_UNR]]
diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
index 2486b80..adf1e21 100644
--- a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
+++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-runtime.ll
@@ -14,7 +14,7 @@ define amdgpu_kernel void @unroll_when_cascaded_gep(i32 %arg) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[ARG:%.*]], 1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP0]], 7
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[ARG]], 7
-; CHECK-NEXT:    br i1 [[TMP1]], label [[BB2_UNR_LCSSA:%.*]], label [[BB_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB1_EPIL_PREHEADER:%.*]], label [[BB_NEW:%.*]]
 ; CHECK:       bb.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[BB1:%.*]]
@@ -24,18 +24,18 @@ define amdgpu_kernel void @unroll_when_cascaded_gep(i32 %arg) {
 ; CHECK-NEXT:    [[ADD_7]] = add i32 [[PHI]], 8
 ; CHECK-NEXT:    [[NITER_NEXT_7]] = add i32 [[NITER]], 8
 ; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i32 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[BB2_UNR_LCSSA_LOOPEXIT:%.*]], label [[BB1]]
-; CHECK:       bb2.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[PHI_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[BB1]] ]
-; CHECK-NEXT:    br label [[BB2_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[BB2_UNR_LCSSA:%.*]], label [[BB1]]
 ; CHECK:       bb2.unr-lcssa:
-; CHECK-NEXT:    [[PHI_UNR:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[PHI_UNR_PH]], [[BB2_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PHI_UNR:%.*]] = phi i32 [ [[ADD_7]], [[BB1]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[BB1_EPIL_PREHEADER:%.*]], label [[BB2:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[BB1_EPIL_PREHEADER]], label [[BB2:%.*]]
 ; CHECK:       bb1.epil.preheader:
+; CHECK-NEXT:    [[PHI_EPIL_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[PHI_UNR]], [[BB2_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[BB1_EPIL:%.*]]
 ; CHECK:       bb1.epil:
-; CHECK-NEXT:    [[PHI_EPIL:%.*]] = phi i32 [ [[PHI_UNR]], [[BB1_EPIL_PREHEADER]] ], [ [[ADD_EPIL:%.*]], [[BB1_EPIL]] ]
+; CHECK-NEXT:    [[PHI_EPIL:%.*]] = phi i32 [ [[PHI_EPIL_INIT]], [[BB1_EPIL_PREHEADER]] ], [ [[ADD_EPIL:%.*]], [[BB1_EPIL]] ]
 ; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i32 [ 0, [[BB1_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[BB1_EPIL]] ]
 ; CHECK-NEXT:    [[GETELEMENTPTR_EPIL:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 8), i32 0, i32 0
 ; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[PHI_EPIL]], 1
diff --git a/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll b/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
index d2911a1..7dacbf6 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll
@@ -11,22 +11,21 @@ define void @test_three_blocks(ptr nocapture %Output, ptr nocapture readonly %Co
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[MAXJ]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[MAXJ]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
 ; CHECK:       for.body.preheader.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[MAXJ]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup.loopexit.unr-lcssa.loopexit:
+; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[TEMP_1_LCSSA_PH_PH:%.*]] = phi i32 [ [[TEMP_1_3:%.*]], [[FOR_INC_3:%.*]] ]
 ; CHECK-NEXT:    [[J_010_UNR_PH:%.*]] = phi i32 [ [[INC_3:%.*]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[TEMP_09_UNR_PH:%.*]] = phi i32 [ [[TEMP_1_3]], [[FOR_INC_3]] ]
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]
-; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[TEMP_1_LCSSA_PH:%.*]] = phi i32 [ poison, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_1_LCSSA_PH_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
-; CHECK-NEXT:    [[J_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[J_010_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[TEMP_09_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_09_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD1]], label [[FOR_BODY_EPIL_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
 ; CHECK:       for.body.epil.preheader:
+; CHECK-NEXT:    [[J_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[J_010_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]] ]
+; CHECK-NEXT:    [[TEMP_09_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_09_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD]])
 ; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
 ; CHECK:       for.body.epil:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[CONDITION:%.*]], i32 [[J_010_UNR]]
@@ -75,7 +74,7 @@ define void @test_three_blocks(ptr nocapture %Output, ptr nocapture readonly %Co
 ; CHECK-NEXT:    [[TEMP_1_LCSSA_PH1:%.*]] = phi i32 [ [[TEMP_1_EPIL]], [[FOR_INC_EPIL]] ], [ [[TEMP_1_EPIL_1]], [[FOR_INC_EPIL_1]] ], [ [[TEMP_1_EPIL_2]], [[FOR_INC_EPIL_2]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
 ; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    [[TEMP_1_LCSSA:%.*]] = phi i32 [ [[TEMP_1_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_1_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    [[TEMP_1_LCSSA:%.*]] = phi i32 [ [[TEMP_1_LCSSA_PH_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_1_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_1_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
@@ -135,7 +134,7 @@ define void @test_three_blocks(ptr nocapture %Output, ptr nocapture readonly %Co
 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_010]], 4
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]]
 ;
 entry:
   %cmp8 = icmp eq i32 %MaxJ, 0
@@ -354,24 +353,23 @@ define void @test_four_blocks(ptr nocapture %Output, ptr nocapture readonly %Con
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[MAXJ]], -2
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP0]], 3
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3
-; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
 ; CHECK:       for.body.lr.ph.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup.loopexit.unr-lcssa.loopexit:
+; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
 ; CHECK-NEXT:    [[TEMP_1_LCSSA_PH_PH:%.*]] = phi i32 [ [[TEMP_1_3:%.*]], [[FOR_INC_3:%.*]] ]
 ; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[I2_3:%.*]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[J_027_UNR_PH:%.*]] = phi i32 [ [[INC_3:%.*]], [[FOR_INC_3]] ]
 ; CHECK-NEXT:    [[TEMP_026_UNR_PH:%.*]] = phi i32 [ [[TEMP_1_3]], [[FOR_INC_3]] ]
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]
-; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[TEMP_1_LCSSA_PH:%.*]] = phi i32 [ poison, [[FOR_BODY_LR_PH]] ], [ [[TEMP_1_LCSSA_PH_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY_LR_PH]] ], [ [[I_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[J_027_UNR:%.*]] = phi i32 [ 1, [[FOR_BODY_LR_PH]] ], [ [[J_027_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[TEMP_026_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[TEMP_026_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD1]], label [[FOR_BODY_EPIL_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
 ; CHECK:       for.body.epil.preheader:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY_LR_PH]] ], [ [[I_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]] ]
+; CHECK-NEXT:    [[J_027_UNR:%.*]] = phi i32 [ 1, [[FOR_BODY_LR_PH]] ], [ [[J_027_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[TEMP_026_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[TEMP_026_UNR_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD]])
 ; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
 ; CHECK:       for.body.epil:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[CONDITION:%.*]], i32 [[J_027_UNR]]
@@ -450,7 +448,7 @@ define void @test_four_blocks(ptr nocapture %Output, ptr nocapture readonly %Con
 ; CHECK-NEXT:    [[TEMP_1_LCSSA_PH1:%.*]] = phi i32 [ [[TEMP_1_EPIL]], [[FOR_INC_EPIL]] ], [ [[TEMP_1_EPIL_1]], [[FOR_INC_EPIL_1]] ], [ [[TEMP_1_EPIL_2]], [[FOR_INC_EPIL_2]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
 ; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    [[TEMP_1_LCSSA:%.*]] = phi i32 [ [[TEMP_1_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_1_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    [[TEMP_1_LCSSA:%.*]] = phi i32 [ [[TEMP_1_LCSSA_PH_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_1_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_1_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
@@ -551,7 +549,7 @@ define void @test_four_blocks(ptr nocapture %Output, ptr nocapture readonly %Con
 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_027]], 4
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]]
 ;
 entry:
   %cmp25 = icmp ugt i32 %MaxJ, 1
diff --git a/llvm/test/Transforms/LoopUnroll/Hexagon/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopUnroll/Hexagon/reuse-lcssa-phi-scev-expansion.ll
index f74fb14..8edc133 100644
--- a/llvm/test/Transforms/LoopUnroll/Hexagon/reuse-lcssa-phi-scev-expansion.ll
+++ b/llvm/test/Transforms/LoopUnroll/Hexagon/reuse-lcssa-phi-scev-expansion.ll
@@ -29,7 +29,7 @@ define void @preserve_lcssa_when_reusing_existing_phi() {
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP1]], 7
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
-; CHECK-NEXT:    br i1 [[TMP3]], label %[[LOOP_1_LATCH_UNR_LCSSA:.*]], label %[[LOOP_4_PREHEADER_NEW:.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[LOOP_4_EPIL_PREHEADER:.*]], label %[[LOOP_4_PREHEADER_NEW:.*]]
 ; CHECK:       [[LOOP_4_PREHEADER_NEW]]:
 ; CHECK-NEXT:    br label %[[LOOP_4:.*]]
 ; CHECK:       [[LOOP_2_LATCH]]:
@@ -47,18 +47,18 @@ define void @preserve_lcssa_when_reusing_existing_phi() {
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    [[INC_I_7]] = add nuw nsw i32 [[IV_4]], 8
 ; CHECK-NEXT:    [[NITER_NEXT_7]] = add nuw nsw i32 [[NITER]], 8
-; CHECK-NEXT:    br i1 true, label %[[LOOP_1_LATCH_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_4]]
-; CHECK:       [[LOOP_1_LATCH_UNR_LCSSA_LOOPEXIT]]:
-; CHECK-NEXT:    [[IV_4_UNR_PH:%.*]] = phi i32 [ [[INC_I_7]], %[[LOOP_4]] ]
-; CHECK-NEXT:    br label %[[LOOP_1_LATCH_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 true, label %[[LOOP_1_LATCH_UNR_LCSSA:.*]], label %[[LOOP_4]]
 ; CHECK:       [[LOOP_1_LATCH_UNR_LCSSA]]:
-; CHECK-NEXT:    [[IV_4_UNR:%.*]] = phi i32 [ 0, %[[LOOP_4_PREHEADER]] ], [ [[IV_4_UNR_PH]], %[[LOOP_1_LATCH_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[IV_4_UNR:%.*]] = phi i32 [ [[INC_I_7]], %[[LOOP_4]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_4_EPIL_PREHEADER:.*]], label %[[LOOP_1_LATCH:.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_4_EPIL_PREHEADER]], label %[[LOOP_1_LATCH:.*]]
 ; CHECK:       [[LOOP_4_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[IV_4_EPIL_INIT:%.*]] = phi i32 [ 0, %[[LOOP_4_PREHEADER]] ], [ [[IV_4_UNR]], %[[LOOP_1_LATCH_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; CHECK-NEXT:    br label %[[LOOP_4_EPIL:.*]]
 ; CHECK:       [[LOOP_4_EPIL]]:
-; CHECK-NEXT:    [[IV_4_EPIL:%.*]] = phi i32 [ [[INC_I_EPIL:%.*]], %[[LOOP_4_EPIL]] ], [ [[IV_4_UNR]], %[[LOOP_4_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[IV_4_EPIL:%.*]] = phi i32 [ [[INC_I_EPIL:%.*]], %[[LOOP_4_EPIL]] ], [ [[IV_4_EPIL_INIT]], %[[LOOP_4_EPIL_PREHEADER]] ]
 ; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i32 [ 0, %[[LOOP_4_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_4_EPIL]] ]
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    [[INC_I_EPIL]] = add i32 [[IV_4_EPIL]], 1
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
index 456875e..5d08e9d 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
@@ -51,16 +51,16 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]]
-; CHECK:       middle.block.unr-lcssa.loopexit:
+; CHECK:       middle.block.unr-lcssa:
 ; CHECK-NEXT:    [[INDEX_UNR_PH:%.*]] = phi i64 [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND12_UNR_PH:%.*]] = phi <16 x i32> [ [[VEC_IND_NEXT13_1]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    br label [[MIDDLE_BLOCK_UNR_LCSSA]]
-; CHECK:       middle.block.unr-lcssa:
-; CHECK-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       vector.body.epil.preheader:
+; CHECK-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[VECTOR_BODY_EPIL:%.*]]
 ; CHECK:       vector.body.epil:
 ; CHECK-NEXT:    [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]]
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
index cd4198f..03277fc 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
@@ -51,16 +51,16 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]]
-; CHECK:       middle.block.unr-lcssa.loopexit:
+; CHECK:       middle.block.unr-lcssa:
 ; CHECK-NEXT:    [[INDEX_UNR_PH:%.*]] = phi i64 [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND12_UNR_PH:%.*]] = phi <16 x i32> [ [[VEC_IND_NEXT13_1]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    br label [[MIDDLE_BLOCK_UNR_LCSSA]]
-; CHECK:       middle.block.unr-lcssa:
-; CHECK-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[MIDDLE_BLOCK:%.*]]
 ; CHECK:       vector.body.epil.preheader:
+; CHECK-NEXT:    [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[VECTOR_BODY_EPIL:%.*]]
 ; CHECK:       vector.body.epil:
 ; CHECK-NEXT:    [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]]
diff --git a/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll b/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
index 811d055..b575057 100644
--- a/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
+++ b/llvm/test/Transforms/LoopUnroll/RISCV/vector.ll
@@ -26,7 +26,7 @@ define void @reverse(ptr %dst, ptr %src, i64 %len) {
 ; SIFIVE-NEXT:    [[TMP2:%.*]] = add i64 [[LEN]], -1
 ; SIFIVE-NEXT:    [[XTRAITER:%.*]] = and i64 [[LEN]], 7
 ; SIFIVE-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7
-; SIFIVE-NEXT:    br i1 [[TMP3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; SIFIVE-NEXT:    br i1 [[TMP3]], label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; SIFIVE:       [[ENTRY_NEW]]:
 ; SIFIVE-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[LEN]], [[XTRAITER]]
 ; SIFIVE-NEXT:    br label %[[FOR_BODY:.*]]
@@ -83,15 +83,15 @@ define void @reverse(ptr %dst, ptr %src, i64 %len) {
 ; SIFIVE-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
 ; SIFIVE-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; SIFIVE-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; SIFIVE-NEXT:    br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_BODY]]
-; SIFIVE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; SIFIVE-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[FOR_BODY]] ]
-; SIFIVE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; SIFIVE-NEXT:    br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA:.*]], label %[[FOR_BODY]]
 ; SIFIVE:       [[EXIT_UNR_LCSSA]]:
-; SIFIVE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; SIFIVE-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[FOR_BODY]] ]
 ; SIFIVE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; SIFIVE-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; SIFIVE-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_BODY_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; SIFIVE:       [[FOR_BODY_EPIL_PREHEADER]]:
+; SIFIVE-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR1]], %[[EXIT_UNR_LCSSA]] ]
+; SIFIVE-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; SIFIVE-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; SIFIVE-NEXT:    br label %[[FOR_BODY_EPIL:.*]]
 ; SIFIVE:       [[FOR_BODY_EPIL]]:
 ; SIFIVE-NEXT:    [[TMP18:%.*]] = sub nsw i64 [[LEN]], [[IV_UNR]]
diff --git a/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll b/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
index ea499e5..b456ad8 100644
--- a/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
+++ b/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
@@ -124,15 +124,17 @@ define hidden void @runtime(ptr nocapture %a, ptr nocapture readonly %b, ptr noc
 ; CHECK:       for.body.preheader:
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[N]], 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[N]], 1
-; CHECK-NEXT:    br i1 [[TMP0]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
 ; CHECK:       for.body.preheader.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i32 [[N]], -2
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[I_09_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL:%.*]]
-; CHECK:       for.body.epil:
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL_PREHEADER]]
+; CHECK:       for.body.epil.preheader:
+; CHECK-NEXT:    [[I_09_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_1:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[I_09_UNR]]
 ; CHECK-NEXT:    [[I_EPIL:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX1_EPIL:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i32 [[I_09_UNR]]
diff --git a/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
index 7fd4eb1..6e600d2 100644
--- a/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
+++ b/llvm/test/Transforms/LoopUnroll/convergent.controlled.ll
@@ -302,7 +302,7 @@ define i32 @pragma_unroll_with_remainder(i32 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 1
-; CHECK-NEXT:    br i1 [[TMP2]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[L3_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[L3:%.*]], !llvm.loop [[LOOP4]]
@@ -316,13 +316,13 @@ define i32 @pragma_unroll_with_remainder(i32 %n) {
 ; CHECK-NEXT:    [[INC_1]] = add nsw i32 [[X_0]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i32 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[L3]], !llvm.loop [[LOOP8:![0-9]+]]
-; CHECK:       exit.unr-lcssa.loopexit:
-; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA:%.*]], label [[L3]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       exit.unr-lcssa:
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[L3_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[L3_EPIL_PREHEADER]], label [[EXIT:%.*]]
 ; CHECK:       l3.epil.preheader:
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[L3_EPIL:%.*]]
 ; CHECK:       l3.epil:
 ; CHECK-NEXT:    [[TOK_LOOP_EPIL:%.*]] = call token @llvm.experimental.convergence.anchor()
diff --git a/llvm/test/Transforms/LoopUnroll/followup.ll b/llvm/test/Transforms/LoopUnroll/followup.ll
index e4ae7b6..051e43d 100644
--- a/llvm/test/Transforms/LoopUnroll/followup.ll
+++ b/llvm/test/Transforms/LoopUnroll/followup.ll
@@ -43,7 +43,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; COUNT: ![[LOOP]] = distinct !{![[LOOP]], ![[FOLLOWUP_ALL]], ![[FOLLOWUP_UNROLLED]]}
 
 
-; EPILOG: br i1 %niter.ncmp.7, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !llvm.loop ![[LOOP_0:[0-9]+]]
+; EPILOG: br i1 %niter.ncmp.7, label %for.end.loopexit.unr-lcssa, label %for.body, !llvm.loop ![[LOOP_0:[0-9]+]]
 ; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop ![[LOOP_2:[0-9]+]]
 
 ; EPILOG: ![[LOOP_0]] = distinct !{![[LOOP_0]], ![[FOLLOWUP_ALL:[0-9]+]], ![[FOLLOWUP_UNROLLED:[0-9]+]]}
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll b/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
index 835fc2f..ee28aa1 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
@@ -3,9 +3,7 @@
 ; Test that epilogue is tagged with the same debug information as original loop body rather than original loop exit.
 
 ; CHECK: for.body.i:
-; CHECK:   br i1 {{.*}}, label %lee1.exit.loopexit.unr-lcssa.loopexit, label %for.body.i, !dbg ![[LOOP_LOC:[0-9]+]]
-; CHECK: lee1.exit.loopexit.unr-lcssa.loopexit:
-; CHECK:   br label %lee1.exit.loopexit.unr-lcssa, !dbg ![[LOOP_LOC]]
+; CHECK:   br i1 {{.*}}, label %lee1.exit.loopexit.unr-lcssa, label %for.body.i, !dbg ![[LOOP_LOC:[0-9]+]]
 ; CHECK: lee1.exit.loopexit.unr-lcssa:
 ; CHECK:   %lcmp.mod = icmp ne i32 %xtraiter, 0, !dbg ![[LOOP_LOC]]
 ; CHECK:   br i1 %lcmp.mod, label %for.body.i.epil.preheader, label %lee1.exit.loopexit, !dbg ![[LOOP_LOC]]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
index a97b394..0c52b5a0 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
@@ -20,7 +20,7 @@ define void @pr56282() {
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP1]], 7
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7
-; CHECK-NEXT:    br i1 [[TMP3]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[OUTER_HEADER_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[INNER_1_HEADER_EPIL_PREHEADER:%.*]], label [[OUTER_HEADER_NEW:%.*]]
 ; CHECK:       outer.header.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP1]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[INNER_1_HEADER:%.*]]
@@ -62,17 +62,16 @@ define void @pr56282() {
 ; CHECK:       inner.1.latch.7:
 ; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp ne i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[INNER_1_HEADER]], label [[OUTER_MIDDLE_UNR_LCSSA_LOOPEXIT:%.*]]
-; CHECK:       outer.middle.unr-lcssa.loopexit:
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[INNER_1_HEADER]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]]
+; CHECK:       outer.middle.unr-lcssa:
 ; CHECK-NEXT:    [[V_LCSSA1_PH_PH:%.*]] = phi i32 [ [[V_7]], [[INNER_1_LATCH_7]] ]
 ; CHECK-NEXT:    [[INNER_1_IV_UNR_PH:%.*]] = phi i64 [ [[INNER_1_IV_NEXT_7]], [[INNER_1_LATCH_7]] ]
-; CHECK-NEXT:    br label [[OUTER_MIDDLE_UNR_LCSSA]]
-; CHECK:       outer.middle.unr-lcssa:
-; CHECK-NEXT:    [[V_LCSSA1_PH:%.*]] = phi i32 [ poison, [[OUTER_HEADER]] ], [ [[V_LCSSA1_PH_PH]], [[OUTER_MIDDLE_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[INNER_1_IV_UNR:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[OUTER_MIDDLE_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[INNER_1_HEADER_EPIL_PREHEADER:%.*]], label [[OUTER_MIDDLE:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[INNER_1_HEADER_EPIL_PREHEADER]], label [[OUTER_MIDDLE:%.*]]
 ; CHECK:       inner.1.header.epil.preheader:
+; CHECK-NEXT:    [[INNER_1_IV_UNR:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[OUTER_MIDDLE_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD3:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD3]])
 ; CHECK-NEXT:    br label [[INNER_1_HEADER_EPIL:%.*]]
 ; CHECK:       inner.1.header.epil:
 ; CHECK-NEXT:    [[INNER_1_IV_EPIL:%.*]] = phi i64 [ [[INNER_1_IV_UNR]], [[INNER_1_HEADER_EPIL_PREHEADER]] ], [ [[INNER_1_IV_NEXT_EPIL:%.*]], [[INNER_1_LATCH_EPIL:%.*]] ]
@@ -90,7 +89,7 @@ define void @pr56282() {
 ; CHECK-NEXT:    [[V_LCSSA1_PH2:%.*]] = phi i32 [ [[V_EPIL]], [[INNER_1_LATCH_EPIL]] ]
 ; CHECK-NEXT:    br label [[OUTER_MIDDLE]]
 ; CHECK:       outer.middle:
-; CHECK-NEXT:    [[V_LCSSA1:%.*]] = phi i32 [ [[V_LCSSA1_PH]], [[OUTER_MIDDLE_UNR_LCSSA]] ], [ [[V_LCSSA1_PH2]], [[OUTER_MIDDLE_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    [[V_LCSSA1:%.*]] = phi i32 [ [[V_LCSSA1_PH_PH]], [[OUTER_MIDDLE_UNR_LCSSA]] ], [ [[V_LCSSA1_PH2]], [[OUTER_MIDDLE_EPILOG_LCSSA]] ]
 ; CHECK-NEXT:    [[C_3:%.*]] = icmp ugt i32 [[V_LCSSA1]], 0
 ; CHECK-NEXT:    br i1 [[C_3]], label [[INNER_2_PREHEADER:%.*]], label [[EXIT:%.*]]
 ; CHECK:       inner.2.preheader:
@@ -102,7 +101,7 @@ define void @pr56282() {
 ; CHECK-NEXT:    ret void
 ; CHECK:       exit.deopt.loopexit:
 ; CHECK-NEXT:    br label [[EXIT_DEOPT:%.*]]
-; CHECK:       exit.deopt.loopexit3:
+; CHECK:       exit.deopt.loopexit4:
 ; CHECK-NEXT:    br label [[EXIT_DEOPT]]
 ; CHECK:       exit.deopt:
 ; CHECK-NEXT:    call void (...) @llvm.experimental.deoptimize.isVoid(i32 0) [ "deopt"() ]
@@ -233,7 +232,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
 ; CHECK-NEXT:    store i32 [[L_1_7]], ptr [[DST]], align 8
 ; CHECK-NEXT:    [[INNER_1_IV_NEXT_7]] = add i64 [[INNER_1_IV]], 8
 ; CHECK-NEXT:    [[CMP_2_7:%.*]] = icmp sgt i64 [[INNER_1_IV_NEXT_6]], 0
-; CHECK-NEXT:    br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF6:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       outer.middle.unr-lcssa:
 ; CHECK-NEXT:    [[L_1_LCSSA_PH:%.*]] = phi i32 [ [[L_1_7]], [[INNER_1_LATCH_7]] ]
 ; CHECK-NEXT:    br label [[OUTER_MIDDLE]]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-i128.ll b/llvm/test/Transforms/LoopUnroll/runtime-i128.ll
index 4cd8e7c..fec8626 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-i128.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-i128.ll
@@ -11,7 +11,7 @@ define void @test(i128 %n, i128 %m) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i128 [[TMP0]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i128 [[TMP0]], 7
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i128 [[TMP1]], 7
-; CHECK-NEXT:    br i1 [[TMP2]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i128 [[TMP0]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -29,18 +29,18 @@ define void @test(i128 %n, i128 %m) {
 ; CHECK-NEXT:    [[IV_NEXT_7]] = add i128 [[IV]], 8
 ; CHECK-NEXT:    [[NITER_NEXT_7]] = add i128 [[NITER]], 8
 ; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp ne i128 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LOOP]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]]
-; CHECK:       exit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i128 [ [[IV_NEXT_7]], [[LOOP]] ]
-; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LOOP]], label [[EXIT_UNR_LCSSA:%.*]]
 ; CHECK:       exit.unr-lcssa:
-; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i128 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i128 [ [[IV_NEXT_7]], [[LOOP]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i128 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER]], label [[EXIT:%.*]]
 ; CHECK:       loop.epil.preheader:
+; CHECK-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i128 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR]], [[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i128 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
 ; CHECK:       loop.epil:
-; CHECK-NEXT:    [[IV_EPIL:%.*]] = phi i128 [ [[IV_UNR]], [[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], [[LOOP_EPIL]] ]
+; CHECK-NEXT:    [[IV_EPIL:%.*]] = phi i128 [ [[IV_EPIL_INIT]], [[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], [[LOOP_EPIL]] ]
 ; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i128 [ 0, [[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LOOP_EPIL]] ]
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    [[IV_NEXT_EPIL]] = add i128 [[IV_EPIL]], 1
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
index 8472a8c..85de29d 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll
@@ -9,7 +9,7 @@ define i32 @test(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
 ; ENABLED-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
 ; ENABLED-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
-; ENABLED-NEXT:    br i1 [[TMP2]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; ENABLED-NEXT:    br i1 [[TMP2]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; ENABLED:       entry.new:
 ; ENABLED-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
 ; ENABLED-NEXT:    br label [[HEADER:%.*]]
@@ -71,23 +71,22 @@ define i32 @test(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
 ; ENABLED-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
-; ENABLED:       for.end.unr-lcssa.loopexit:
+; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA:%.*]], label [[HEADER]]
+; ENABLED:       for.end.unr-lcssa:
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ]
 ; ENABLED-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[FOR_BODY_7]] ]
 ; ENABLED-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ]
-; ENABLED-NEXT:    br label [[FOR_END_UNR_LCSSA]]
-; ENABLED:       for.end.unr-lcssa:
-; ENABLED-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
-; ENABLED-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
-; ENABLED-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
 ; ENABLED-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER]], label [[FOR_END:%.*]]
 ; ENABLED:       header.epil.preheader:
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_UNR_LCSSA]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_UNR_LCSSA]] ]
+; ENABLED-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; ENABLED-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; ENABLED-NEXT:    br label [[HEADER_EPIL:%.*]]
 ; ENABLED:       header.epil:
-; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
 ; ENABLED-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ]
 ; ENABLED-NEXT:    [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42
 ; ENABLED-NEXT:    br i1 [[CMP_EPIL]], label [[FOR_EXIT2_LOOPEXIT2:%.*]], label [[FOR_BODY_EPIL]]
@@ -104,12 +103,12 @@ define i32 @test(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ]
 ; ENABLED-NEXT:    br label [[FOR_END]]
 ; ENABLED:       for.end:
-; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH1]], [[FOR_END_EPILOG_LCSSA]] ]
+; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH1]], [[FOR_END_EPILOG_LCSSA]] ]
 ; ENABLED-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; ENABLED:       for.exit2.loopexit:
 ; ENABLED-NEXT:    [[RETVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[HEADER]] ], [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1]], [[FOR_BODY_1]] ], [ [[ADD_2]], [[FOR_BODY_2]] ], [ [[ADD_3]], [[FOR_BODY_3]] ], [ [[ADD_4]], [[FOR_BODY_4]] ], [ [[ADD_5]], [[FOR_BODY_5]] ], [ [[ADD_6]], [[FOR_BODY_6]] ]
 ; ENABLED-NEXT:    br label [[FOR_EXIT2:%.*]]
-; ENABLED:       for.exit2.loopexit2:
+; ENABLED:       for.exit2.loopexit3:
 ; ENABLED-NEXT:    [[RETVAL_PH3:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[HEADER_EPIL]] ]
 ; ENABLED-NEXT:    br label [[FOR_EXIT2]]
 ; ENABLED:       for.exit2:
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
index 6e3bbe1..2617199 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
@@ -3,7 +3,7 @@
 ;; Check that the remainder loop is properly assigned a branch weight for its latch branch.
 ; CHECK-LABEL: @test(
 ; CHECK-LABEL: for.body:
-; CHECK: br i1 [[COND1:%.*]], label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !prof ![[#PROF:]], !llvm.loop ![[#LOOP:]]
+; CHECK: br i1 [[COND1:%.*]], label %for.end.loopexit.unr-lcssa, label %for.body, !prof ![[#PROF:]], !llvm.loop ![[#LOOP:]]
 ; CHECK-LABEL: for.body.epil:
 ; CHECK: br i1 [[COND2:%.*]], label  %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]]
 ; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 2499}
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
index 5f6e66e..6835e9b 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll
@@ -15,7 +15,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %exit2.loopexit.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %loop_header
@@ -29,7 +29,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    br i1 false, label %loop_latch, label %exit3.loopexit
 ; EPILOG:       exit3.loopexit:
 ; EPILOG-NEXT:    br label %exit3
-; EPILOG:       exit3.loopexit2:
+; EPILOG:       exit3.loopexit3:
 ; EPILOG-NEXT:    br label %exit3
 ; EPILOG:       exit3:
 ; EPILOG-NEXT:    ret void
@@ -79,30 +79,30 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit1:
+; EPILOG:       exit1.loopexit2:
 ; EPILOG-NEXT:    br label %exit1
 ; EPILOG:       exit1:
 ; EPILOG-NEXT:    ret void
-; EPILOG:       exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit2.loopexit.unr-lcssa
 ; EPILOG:       exit2.loopexit.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %iv.unr = phi i64 [ %iv_next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
 ; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit2.loopexit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.epil.init, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting_bb1.epil
 ; EPILOG:       loop_exiting_bb1.epil:
-; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.epil, label %exit1.loopexit1
+; EPILOG-NEXT:    br i1 false, label %loop_exiting_bb2.epil, label %exit1.loopexit2
 ; EPILOG:       loop_exiting_bb2.epil:
-; EPILOG-NEXT:    br i1 false, label %loop_latch.epil, label %exit3.loopexit2
+; EPILOG-NEXT:    br i1 false, label %loop_latch.epil, label %exit3.loopexit3
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
@@ -120,7 +120,7 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %exit2.loopexit.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
@@ -146,17 +146,17 @@ define void @test1(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit, !llvm.loop !0
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa, !llvm.loop !0
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit.unr-lcssa
 ; EPILOG-BLOCK:       exit2.loopexit.unr-lcssa:
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting_bb1.epil
@@ -366,7 +366,7 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %for.end.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %header
@@ -448,28 +448,27 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %for.end.unr-lcssa.loopexit, label %header
-; EPILOG:       for.end.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.7, %for.body.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %for.body.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %for.body.7 ]
-; EPILOG-NEXT:    br label %for.end.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %for.end.unr-lcssa, label %header
 ; EPILOG:       for.end.unr-lcssa:
-; EPILOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ poison, %entry ], [ %sum.0.lcssa.ph.ph, %for.end.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %for.end.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %for.end.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.7, %for.body.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.7, %for.body.7 ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ %add.7, %for.body.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %for.end
 ; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %for.end.unr-lcssa ]
+; EPILOG-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %for.end.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %for.body.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.epil.init, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %for.body.epil ], [ %sum.02.epil.init, %header.epil.preheader ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %for.body.epil ]
-; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit3, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit2, label %for.body.epil
+; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit3, label %for.body.epil
 ; EPILOG:       for.body.epil:
 ; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil
 ; EPILOG-NEXT:    %11 = load i32, ptr %arrayidx.epil, align 4
@@ -488,11 +487,11 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; EPILOG:       for.exit2.loopexit:
 ; EPILOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %for.body.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %for.body.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %for.body.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %for.body.6 ], [ 42, %for.exiting_block.7 ]
 ; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
-; EPILOG-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; EPILOG:       for.exit2.loopexit3:
+; EPILOG-NEXT:    %retval.ph4 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
 ; EPILOG-NEXT:    br label %for.exit2
 ; EPILOG:       for.exit2:
-; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph4, %for.exit2.loopexit3 ]
 ; EPILOG-NEXT:    ret i32 %retval
 ;
 ; EPILOG-BLOCK-LABEL: @test2(
@@ -501,7 +500,7 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %for.end.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
@@ -529,19 +528,18 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %for.end.unr-lcssa.loopexit, label %header, !llvm.loop !2
-; EPILOG-BLOCK:       for.end.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.1, %for.body.1 ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %for.body.1 ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr.ph = phi i32 [ %add.1, %for.body.1 ]
-; EPILOG-BLOCK-NEXT:    br label %for.end.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %for.end.unr-lcssa, label %header, !llvm.loop !2
 ; EPILOG-BLOCK:       for.end.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph = phi i32 [ poison, %entry ], [ %sum.0.lcssa.ph.ph, %for.end.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %for.end.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %for.end.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.1, %for.body.1 ]
+; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.1, %for.body.1 ]
+; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ %add.1, %for.body.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %for.end
 ; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %for.end.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %for.end.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2, label %for.exiting_block.epil
@@ -549,9 +547,9 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %for.exit2, label %for.body.epil
 ; EPILOG-BLOCK:       for.body.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.unr
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil.init
 ; EPILOG-BLOCK-NEXT:    %5 = load i32, ptr %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.epil.init
 ; EPILOG-BLOCK-NEXT:    br label %for.end
 ; EPILOG-BLOCK:       for.end:
 ; EPILOG-BLOCK-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.body.epil ]
@@ -560,7 +558,7 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; EPILOG-BLOCK-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %for.exit2
 ; EPILOG-BLOCK:       for.exit2:
-; EPILOG-BLOCK-NEXT:    %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
+; EPILOG-BLOCK-NEXT:    %retval = phi i32 [ %sum.02.epil.init, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
 ; EPILOG-BLOCK-NEXT:    ret i32 %retval
 ;
 ; PROLOG-LABEL: @test2(
@@ -796,7 +794,7 @@ define void @test3(i64 %trip, i64 %add, i1 %arg) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %exit2.loopexit.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %loop_header
@@ -812,7 +810,7 @@ define void @test3(i64 %trip, i64 %add, i1 %arg) {
 ; EPILOG-NEXT:    ]
 ; EPILOG:       exit3.loopexit:
 ; EPILOG-NEXT:    br label %exit3
-; EPILOG:       exit3.loopexit2:
+; EPILOG:       exit3.loopexit3:
 ; EPILOG-NEXT:    br label %exit3
 ; EPILOG:       exit3:
 ; EPILOG-NEXT:    ret void
@@ -877,33 +875,33 @@ define void @test3(i64 %trip, i64 %add, i1 %arg) {
 ; EPILOG-NEXT:    %sum.next.7 = add i64 %sum.next.6, %add
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.loopexit.unr-lcssa
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit1:
+; EPILOG:       exit1.loopexit2:
 ; EPILOG-NEXT:    br label %exit1
 ; EPILOG:       exit1:
 ; EPILOG-NEXT:    ret void
-; EPILOG:       exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %sum.unr.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit2.loopexit.unr-lcssa
 ; EPILOG:       exit2.loopexit.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %iv.unr = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %sum.unr = phi i64 [ %sum.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
 ; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit2.loopexit.unr-lcssa ]
+; EPILOG-NEXT:    %sum.epil.init = phi i64 [ 0, %entry ], [ %sum.unr, %exit2.loopexit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.epil.init, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.epil.init, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    br i1 %arg, label %loop_latch.epil, label %loop_exiting_bb1.epil
 ; EPILOG:       loop_exiting_bb1.epil:
 ; EPILOG-NEXT:    switch i64 %sum.epil, label %loop_latch.epil [
-; EPILOG-NEXT:      i64 24, label %exit1.loopexit1
-; EPILOG-NEXT:      i64 42, label %exit3.loopexit2
+; EPILOG-NEXT:      i64 24, label %exit1.loopexit2
+; EPILOG-NEXT:      i64 42, label %exit3.loopexit3
 ; EPILOG-NEXT:    ]
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
@@ -923,7 +921,7 @@ define void @test3(i64 %trip, i64 %add, i1 %arg) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %exit2.loopexit.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
@@ -954,24 +952,24 @@ define void @test3(i64 %trip, i64 %add, i1 %arg) {
 ; EPILOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa.loopexit, !llvm.loop !3
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.loopexit.unr-lcssa, !llvm.loop !3
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %sum.unr.ph = phi i64 [ %sum.next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %exit2.loopexit.unr-lcssa
 ; EPILOG-BLOCK:       exit2.loopexit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %sum.unr = phi i64 [ %sum.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %sum.epil.init = phi i64 [ 0, %entry ], [ %sum.unr, %exit2.loopexit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 %arg, label %loop_latch.epil, label %loop_exiting_bb1.epil
 ; EPILOG-BLOCK:       loop_exiting_bb1.epil:
-; EPILOG-BLOCK-NEXT:    switch i64 %sum.unr, label %loop_latch.epil [
+; EPILOG-BLOCK-NEXT:    switch i64 %sum.epil.init, label %loop_latch.epil [
 ; EPILOG-BLOCK-NEXT:      i64 24, label %exit1
 ; EPILOG-BLOCK-NEXT:      i64 42, label %exit3
 ; EPILOG-BLOCK-NEXT:    ]
@@ -1204,7 +1202,7 @@ define i32 @hdr_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %header
@@ -1286,28 +1284,27 @@ define i32 @hdr_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
-; EPILOG:       latchExit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa, label %header
 ; EPILOG:       latchExit.unr-lcssa:
-; EPILOG-NEXT:    %result.ph = phi i32 [ poison, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %result.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ %add.7, %latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latchExit.unr-lcssa ]
+; EPILOG-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latchExit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.epil.init, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil.init, %header.epil.preheader ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit2, label %for.exiting_block.epil
+; EPILOG-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa.loopexit3, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit4, label %latch.epil
+; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit5, label %latch.epil
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil
 ; EPILOG-NEXT:    %11 = load i32, ptr %arrayidx.epil, align 4
@@ -1316,22 +1313,22 @@ define i32 @hdr_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
 ; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
 ; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit2, !llvm.loop !4
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !4
 ; EPILOG:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ], [ 0, %latch.1 ], [ 0, %latch.2 ], [ 0, %latch.3 ], [ 0, %latch.4 ], [ 0, %latch.5 ], [ 0, %latch.6 ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa.loopexit2:
-; EPILOG-NEXT:    %result.ph1.ph3 = phi i32 [ 0, %header.epil ], [ %add.epil, %latch.epil ]
+; EPILOG:       latchExit.epilog-lcssa.loopexit3:
+; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ 0, %header.epil ], [ %add.epil, %latch.epil ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG:       latchExit.epilog-lcssa:
-; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph3, %latchExit.epilog-lcssa.loopexit2 ]
+; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
 ; EPILOG-NEXT:    br label %latchExit
 ; EPILOG:       latchExit:
 ; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
 ; EPILOG-NEXT:    ret i32 %result
 ; EPILOG:       for.exit2.loopexit:
 ; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit4:
+; EPILOG:       for.exit2.loopexit5:
 ; EPILOG-NEXT:    br label %for.exit2
 ; EPILOG:       for.exit2:
 ; EPILOG-NEXT:    ret i32 42
@@ -1342,7 +1339,7 @@ define i32 @hdr_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
@@ -1370,19 +1367,18 @@ define i32 @hdr_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !4
-; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa, label %header, !llvm.loop !4
 ; EPILOG-BLOCK:       latchExit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ poison, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ %add.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latchExit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latchExit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %latchExit.epilog-lcssa, label %for.exiting_block.epil
@@ -1390,9 +1386,9 @@ define i32 @hdr_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %for.exit2, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.unr
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil.init
 ; EPILOG-BLOCK-NEXT:    %5 = load i32, ptr %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.epil.init
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ]
@@ -1644,7 +1640,7 @@ define i32 @otherblock_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %header
@@ -1726,28 +1722,27 @@ define i32 @otherblock_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
-; EPILOG:       latchExit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa, label %header
 ; EPILOG:       latchExit.unr-lcssa:
-; EPILOG-NEXT:    %result.ph = phi i32 [ poison, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %result.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ %add.7, %latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latchExit.unr-lcssa ]
+; EPILOG-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latchExit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.epil.init, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil.init, %header.epil.preheader ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit3, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit3, label %latch.epil
+; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit4, label %latch.epil
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil
 ; EPILOG-NEXT:    %11 = load i32, ptr %arrayidx.epil, align 4
@@ -1756,22 +1751,22 @@ define i32 @otherblock_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
 ; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
 ; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !5
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit4, !llvm.loop !5
 ; EPILOG:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ], [ 2, %for.exiting_block.2 ], [ 2, %for.exiting_block.3 ], [ 2, %for.exiting_block.4 ], [ 2, %for.exiting_block.5 ], [ 2, %for.exiting_block.6 ], [ 2, %for.exiting_block.7 ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa.loopexit3:
-; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ 2, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
+; EPILOG:       latchExit.epilog-lcssa.loopexit4:
+; EPILOG-NEXT:    %result.ph1.ph5 = phi i32 [ 2, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG:       latchExit.epilog-lcssa:
-; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
+; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph5, %latchExit.epilog-lcssa.loopexit4 ]
 ; EPILOG-NEXT:    br label %latchExit
 ; EPILOG:       latchExit:
 ; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
 ; EPILOG-NEXT:    ret i32 %result
 ; EPILOG:       for.exit2.loopexit:
 ; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
+; EPILOG:       for.exit2.loopexit3:
 ; EPILOG-NEXT:    br label %for.exit2
 ; EPILOG:       for.exit2:
 ; EPILOG-NEXT:    ret i32 42
@@ -1782,7 +1777,7 @@ define i32 @otherblock_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
@@ -1810,19 +1805,18 @@ define i32 @otherblock_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !5
-; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa, label %header, !llvm.loop !5
 ; EPILOG-BLOCK:       latchExit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ poison, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ %add.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latchExit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latchExit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2, label %for.exiting_block.epil
@@ -1830,9 +1824,9 @@ define i32 @otherblock_latch_same_exit(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.unr
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil.init
 ; EPILOG-BLOCK-NEXT:    %5 = load i32, ptr %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.epil.init
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ]
@@ -2085,7 +2079,7 @@ define i32 @otherblock_latch_same_exit2(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %header
@@ -2167,28 +2161,27 @@ define i32 @otherblock_latch_same_exit2(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
-; EPILOG:       latchExit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa, label %header
 ; EPILOG:       latchExit.unr-lcssa:
-; EPILOG-NEXT:    %result.ph = phi i32 [ poison, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %result.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ %add.7, %latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latchExit.unr-lcssa ]
+; EPILOG-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latchExit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.epil.init, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil.init, %header.epil.preheader ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit3, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit3, label %latch.epil
+; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit4, label %latch.epil
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil
 ; EPILOG-NEXT:    %11 = load i32, ptr %arrayidx.epil, align 4
@@ -2197,22 +2190,22 @@ define i32 @otherblock_latch_same_exit2(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
 ; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
 ; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !6
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit4, !llvm.loop !6
 ; EPILOG:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa.loopexit3:
-; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ %sum.02.epil, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
+; EPILOG:       latchExit.epilog-lcssa.loopexit4:
+; EPILOG-NEXT:    %result.ph1.ph5 = phi i32 [ %sum.02.epil, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG:       latchExit.epilog-lcssa:
-; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
+; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph5, %latchExit.epilog-lcssa.loopexit4 ]
 ; EPILOG-NEXT:    br label %latchExit
 ; EPILOG:       latchExit:
 ; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
 ; EPILOG-NEXT:    ret i32 %result
 ; EPILOG:       for.exit2.loopexit:
 ; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
+; EPILOG:       for.exit2.loopexit3:
 ; EPILOG-NEXT:    br label %for.exit2
 ; EPILOG:       for.exit2:
 ; EPILOG-NEXT:    ret i32 42
@@ -2223,7 +2216,7 @@ define i32 @otherblock_latch_same_exit2(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
@@ -2251,19 +2244,18 @@ define i32 @otherblock_latch_same_exit2(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !6
-; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa, label %header, !llvm.loop !6
 ; EPILOG-BLOCK:       latchExit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ poison, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ %add.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latchExit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latchExit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2, label %for.exiting_block.epil
@@ -2271,15 +2263,15 @@ define i32 @otherblock_latch_same_exit2(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.unr
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil.init
 ; EPILOG-BLOCK-NEXT:    %5 = load i32, ptr %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.epil.init
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa:
-; EPILOG-BLOCK-NEXT:    %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil.init, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit
 ; EPILOG-BLOCK:       latchExit:
 ; EPILOG-BLOCK-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
@@ -2527,7 +2519,7 @@ define i32 @otherblock_latch_same_exit3(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %header
@@ -2609,52 +2601,51 @@ define i32 @otherblock_latch_same_exit3(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa.loopexit, label %header
-; EPILOG:       latchExit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %result.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latchExit.unr-lcssa, label %header
 ; EPILOG:       latchExit.unr-lcssa:
-; EPILOG-NEXT:    %result.ph = phi i32 [ poison, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %result.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ %add.7, %latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latchExit.unr-lcssa ]
+; EPILOG-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latchExit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.epil.init, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil.init, %header.epil.preheader ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
-; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG-NEXT:    br i1 %cond, label %for.exit2.loopexit3, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil
 ; EPILOG-NEXT:    %11 = load i32, ptr %arrayidx.epil, align 4
 ; EPILOG-NEXT:    %add.epil = add nsw i32 %11, %sum.02.epil
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit3, label %latch.epil
+; EPILOG-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa.loopexit4, label %latch.epil
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
 ; EPILOG-NEXT:    %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
 ; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
 ; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !7
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit4, !llvm.loop !7
 ; EPILOG:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ], [ %add.1, %for.exiting_block.2 ], [ %add.2, %for.exiting_block.3 ], [ %add.3, %for.exiting_block.4 ], [ %add.4, %for.exiting_block.5 ], [ %add.5, %for.exiting_block.6 ], [ %add.6, %for.exiting_block.7 ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa.loopexit3:
-; EPILOG-NEXT:    %result.ph1.ph4 = phi i32 [ %sum.02.epil, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
+; EPILOG:       latchExit.epilog-lcssa.loopexit4:
+; EPILOG-NEXT:    %result.ph1.ph5 = phi i32 [ %sum.02.epil, %for.exiting_block.epil ], [ %add.epil, %latch.epil ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG:       latchExit.epilog-lcssa:
-; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
+; EPILOG-NEXT:    %result.ph1 = phi i32 [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %result.ph1.ph5, %latchExit.epilog-lcssa.loopexit4 ]
 ; EPILOG-NEXT:    br label %latchExit
 ; EPILOG:       latchExit:
 ; EPILOG-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
 ; EPILOG-NEXT:    ret i32 %result
 ; EPILOG:       for.exit2.loopexit:
 ; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
+; EPILOG:       for.exit2.loopexit3:
 ; EPILOG-NEXT:    br label %for.exit2
 ; EPILOG:       for.exit2:
 ; EPILOG-NEXT:    ret i32 42
@@ -2665,7 +2656,7 @@ define i32 @otherblock_latch_same_exit3(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
@@ -2693,26 +2684,25 @@ define i32 @otherblock_latch_same_exit3(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa.loopexit, label %header, !llvm.loop !7
-; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %result.ph.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latchExit.unr-lcssa, label %header, !llvm.loop !7
 ; EPILOG-BLOCK:       latchExit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ poison, %entry ], [ %result.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %result.ph = phi i32 [ %add.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latchExit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latchExit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %for.exit2, label %for.exiting_block.epil
 ; EPILOG-BLOCK:       for.exiting_block.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.unr
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil.init
 ; EPILOG-BLOCK-NEXT:    %5 = load i32, ptr %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %5, %sum.02.epil.init
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
@@ -2721,7 +2711,7 @@ define i32 @otherblock_latch_same_exit3(ptr nocapture %a, i64 %n, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa:
-; EPILOG-BLOCK-NEXT:    %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil.init, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit
 ; EPILOG-BLOCK:       latchExit:
 ; EPILOG-BLOCK-NEXT:    %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
@@ -3013,7 +3003,7 @@ define void @unique_exit(i32 %N, i32 %M) {
 ; EPILOG-NEXT:    %1 = add i32 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i32 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i32 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %preheader.new
+; EPILOG-NEXT:    br i1 %2, label %header.epil.preheader, label %preheader.new
 ; EPILOG:       preheader.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i32 %0, %xtraiter
 ; EPILOG-NEXT:    br label %header
@@ -3054,37 +3044,36 @@ define void @unique_exit(i32 %N, i32 %M) {
 ; EPILOG:       latch.7:
 ; EPILOG-NEXT:    %niter.next.7 = add nuw i32 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i32 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %latchExit.unr-lcssa.loopexit
-; EPILOG:       latchExit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %i2.ph.ph.ph = phi i32 [ -1, %latch.7 ]
-; EPILOG-NEXT:    %i4.unr.ph = phi i32 [ %inc.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %latchExit.unr-lcssa
 ; EPILOG:       latchExit.unr-lcssa:
-; EPILOG-NEXT:    %i2.ph.ph = phi i32 [ poison, %preheader ], [ %i2.ph.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %i4.unr = phi i32 [ 0, %preheader ], [ %i4.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %i2.ph.ph = phi i32 [ -1, %latch.7 ]
+; EPILOG-NEXT:    %i4.unr = phi i32 [ %inc.7, %latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i32 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    %i4.epil.init = phi i32 [ 0, %preheader ], [ %i4.unr, %latchExit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod2 = icmp ne i32 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
-; EPILOG-NEXT:    %i4.epil = phi i32 [ %inc.epil, %latch.epil ], [ %i4.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %i4.epil = phi i32 [ %inc.epil, %latch.epil ], [ %i4.epil.init, %header.epil.preheader ]
 ; EPILOG-NEXT:    %epil.iter = phi i32 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    %inc.epil = add nuw i32 %i4.epil, 1
 ; EPILOG-NEXT:    %cmp1.epil = icmp ult i32 %inc.epil, %N
-; EPILOG-NEXT:    br i1 %cmp1.epil, label %latch.epil, label %latchExit.epilog-lcssa.loopexit2
+; EPILOG-NEXT:    br i1 %cmp1.epil, label %latch.epil, label %latchExit.epilog-lcssa.loopexit3
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp ult i32 %inc.epil, %M.shifted
 ; EPILOG-NEXT:    %epil.iter.next = add i32 %epil.iter, 1
 ; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i32 %epil.iter.next, %xtraiter
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit2, !llvm.loop !8
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %header.epil, label %latchExit.epilog-lcssa.loopexit3, !llvm.loop !8
 ; EPILOG:       latchExit.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    %i2.ph.ph1.ph = phi i32 [ %i4, %header ], [ %inc, %latch ], [ %inc.1, %latch.1 ], [ %inc.2, %latch.2 ], [ %inc.3, %latch.3 ], [ %inc.4, %latch.4 ], [ %inc.5, %latch.5 ], [ %inc.6, %latch.6 ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
-; EPILOG:       latchExit.epilog-lcssa.loopexit2:
-; EPILOG-NEXT:    %i2.ph.ph1.ph3 = phi i32 [ %i4.epil, %header.epil ], [ -1, %latch.epil ]
+; EPILOG:       latchExit.epilog-lcssa.loopexit3:
+; EPILOG-NEXT:    %i2.ph.ph1.ph4 = phi i32 [ %i4.epil, %header.epil ], [ -1, %latch.epil ]
 ; EPILOG-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG:       latchExit.epilog-lcssa:
-; EPILOG-NEXT:    %i2.ph.ph1 = phi i32 [ %i2.ph.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %i2.ph.ph1.ph3, %latchExit.epilog-lcssa.loopexit2 ]
+; EPILOG-NEXT:    %i2.ph.ph1 = phi i32 [ %i2.ph.ph1.ph, %latchExit.epilog-lcssa.loopexit ], [ %i2.ph.ph1.ph4, %latchExit.epilog-lcssa.loopexit3 ]
 ; EPILOG-NEXT:    br label %latchExit
 ; EPILOG:       latchExit:
 ; EPILOG-NEXT:    %i2.ph = phi i32 [ %i2.ph.ph, %latchExit.unr-lcssa ], [ %i2.ph.ph1, %latchExit.epilog-lcssa ]
@@ -3098,7 +3087,7 @@ define void @unique_exit(i32 %N, i32 %M) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i32 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i32 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i32 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %latchExit.unr-lcssa, label %preheader.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %header.epil.preheader, label %preheader.new
 ; EPILOG-BLOCK:       preheader.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i32 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
@@ -3115,20 +3104,19 @@ define void @unique_exit(i32 %N, i32 %M) {
 ; EPILOG-BLOCK:       latch.1:
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add nuw i32 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i32 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %latchExit.unr-lcssa.loopexit, !llvm.loop !8
-; EPILOG-BLOCK:       latchExit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %i2.ph.ph.ph = phi i32 [ -1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %i4.unr.ph = phi i32 [ %inc.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %latchExit.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %latchExit.unr-lcssa, !llvm.loop !8
 ; EPILOG-BLOCK:       latchExit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %i2.ph.ph = phi i32 [ poison, %preheader ], [ %i2.ph.ph.ph, %latchExit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %i4.unr = phi i32 [ 0, %preheader ], [ %i4.unr.ph, %latchExit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %i2.ph.ph = phi i32 [ -1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %i4.unr = phi i32 [ %inc.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i32 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchExit
 ; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %i4.epil.init = phi i32 [ 0, %preheader ], [ %i4.unr, %latchExit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod2 = icmp ne i32 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
-; EPILOG-BLOCK-NEXT:    %inc.epil = add nuw i32 %i4.unr, 1
+; EPILOG-BLOCK-NEXT:    %inc.epil = add nuw i32 %i4.epil.init, 1
 ; EPILOG-BLOCK-NEXT:    %cmp1.epil = icmp ult i32 %inc.epil, %N
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp1.epil, label %latch.epil, label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latch.epil:
@@ -3137,7 +3125,7 @@ define void @unique_exit(i32 %N, i32 %M) {
 ; EPILOG-BLOCK-NEXT:    %i2.ph.ph1.ph = phi i32 [ %i4, %header ], [ %inc, %latch ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit.epilog-lcssa
 ; EPILOG-BLOCK:       latchExit.epilog-lcssa:
-; EPILOG-BLOCK-NEXT:    %i2.ph.ph1 = phi i32 [ -1, %latch.epil ], [ %i4.unr, %header.epil ], [ %i2.ph.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %i2.ph.ph1 = phi i32 [ -1, %latch.epil ], [ %i4.epil.init, %header.epil ], [ %i2.ph.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
 ; EPILOG-BLOCK-NEXT:    br label %latchExit
 ; EPILOG-BLOCK:       latchExit:
 ; EPILOG-BLOCK-NEXT:    %i2.ph = phi i32 [ %i2.ph.ph, %latchExit.unr-lcssa ], [ %i2.ph.ph1, %latchExit.epilog-lcssa ]
@@ -3300,7 +3288,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %latchexit.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %loop_header
@@ -3390,39 +3378,38 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-NEXT:    %sum.next.7 = add i64 %sum.next.6, %add
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %latchexit.unr-lcssa.loopexit
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %latchexit.unr-lcssa
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.2, %loop_exiting.2 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.3, %loop_exiting.3 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.4, %loop_exiting.4 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.5, %loop_exiting.5 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.6, %loop_exiting.6 ], [ %ivy.7, %loop_exiting.7 ], [ %ivy.7, %loop_exiting.7 ]
 ; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit2:
-; EPILOG-NEXT:    %result.ph3 = phi i64 [ %ivy.epil, %loop_exiting.epil ], [ %ivy.epil, %loop_exiting.epil ]
+; EPILOG:       exit1.loopexit3:
+; EPILOG-NEXT:    %result.ph4 = phi i64 [ %ivy.epil, %loop_exiting.epil ], [ %ivy.epil, %loop_exiting.epil ]
 ; EPILOG-NEXT:    br label %exit1
 ; EPILOG:       exit1:
-; EPILOG-NEXT:    %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %result.ph3, %exit1.loopexit2 ]
+; EPILOG-NEXT:    %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %result.ph4, %exit1.loopexit3 ]
 ; EPILOG-NEXT:    ret i64 %result
-; EPILOG:       latchexit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %sum.next.lcssa.ph.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    %sum.unr.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %latchexit.unr-lcssa
 ; EPILOG:       latchexit.unr-lcssa:
-; EPILOG-NEXT:    %sum.next.lcssa.ph = phi i64 [ poison, %entry ], [ %sum.next.lcssa.ph.ph, %latchexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %latchexit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %latchexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.next.lcssa.ph = phi i64 [ %sum.next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %iv.unr = phi i64 [ %iv_next.7, %loop_latch.7 ]
+; EPILOG-NEXT:    %sum.unr = phi i64 [ %sum.next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %latchexit
 ; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %latchexit.unr-lcssa ]
+; EPILOG-NEXT:    %sum.epil.init = phi i64 [ 0, %entry ], [ %sum.unr, %latchexit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
-; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.epil.init, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %sum.epil = phi i64 [ %sum.epil.init, %loop_header.epil.preheader ], [ %sum.next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting.epil
 ; EPILOG:       loop_exiting.epil:
 ; EPILOG-NEXT:    %ivy.epil = add i64 %iv.epil, %add
 ; EPILOG-NEXT:    switch i64 %sum.epil, label %loop_latch.epil [
-; EPILOG-NEXT:      i64 24, label %exit1.loopexit2
-; EPILOG-NEXT:      i64 42, label %exit1.loopexit2
+; EPILOG-NEXT:      i64 24, label %exit1.loopexit3
+; EPILOG-NEXT:      i64 42, label %exit1.loopexit3
 ; EPILOG-NEXT:    ]
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add nuw nsw i64 %iv.epil, 1
@@ -3444,7 +3431,7 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %latchexit.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
@@ -3474,36 +3461,35 @@ define i64 @test5(i64 %trip, i64 %add, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %sum.next.1 = add i64 %sum.next, %add
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %latchexit.unr-lcssa.loopexit, !llvm.loop !9
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %latchexit.unr-lcssa, !llvm.loop !9
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    %result = phi i64 [ %ivy.epil, %loop_exiting.epil ], [ %ivy.epil, %loop_exiting.epil ], [ %result.ph, %exit1.loopexit ]
 ; EPILOG-BLOCK-NEXT:    ret i64 %result
-; EPILOG-BLOCK:       latchexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %sum.next.lcssa.ph.ph = phi i64 [ %sum.next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    %sum.unr.ph = phi i64 [ %sum.next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %latchexit.unr-lcssa
 ; EPILOG-BLOCK:       latchexit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %sum.next.lcssa.ph = phi i64 [ poison, %entry ], [ %sum.next.lcssa.ph.ph, %latchexit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %latchexit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %latchexit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %sum.next.lcssa.ph = phi i64 [ %sum.next.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ %iv_next.1, %loop_latch.1 ]
+; EPILOG-BLOCK-NEXT:    %sum.unr = phi i64 [ %sum.next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %latchexit
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %latchexit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.epil.init = phi i64 [ 0, %entry ], [ %sum.unr, %latchexit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.epil, label %loop_exiting.epil
 ; EPILOG-BLOCK:       loop_exiting.epil:
-; EPILOG-BLOCK-NEXT:    %ivy.epil = add i64 %iv.unr, %add
-; EPILOG-BLOCK-NEXT:    switch i64 %sum.unr, label %loop_latch.epil [
+; EPILOG-BLOCK-NEXT:    %ivy.epil = add i64 %iv.epil.init, %add
+; EPILOG-BLOCK-NEXT:    switch i64 %sum.epil.init, label %loop_latch.epil [
 ; EPILOG-BLOCK-NEXT:      i64 24, label %exit1
 ; EPILOG-BLOCK-NEXT:      i64 42, label %exit1
 ; EPILOG-BLOCK-NEXT:    ]
 ; EPILOG-BLOCK:       loop_latch.epil:
-; EPILOG-BLOCK-NEXT:    %sum.next.epil = add i64 %sum.unr, %add
+; EPILOG-BLOCK-NEXT:    %sum.next.epil = add i64 %sum.epil.init, %add
 ; EPILOG-BLOCK-NEXT:    br label %latchexit
 ; EPILOG-BLOCK:       latchexit:
 ; EPILOG-BLOCK-NEXT:    %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.epil, %loop_latch.epil ]
@@ -3752,7 +3738,7 @@ define i32 @test6(ptr nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %latch_exit.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %header
@@ -3834,28 +3820,27 @@ define i32 @test6(ptr nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-NEXT:    %indvars.iv.next.7 = add i64 %indvars.iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latch_exit.unr-lcssa.loopexit, label %header
-; EPILOG:       latch_exit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    %sum.02.unr.ph = phi i32 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latch_exit.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %latch_exit.unr-lcssa, label %header
 ; EPILOG:       latch_exit.unr-lcssa:
-; EPILOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ poison, %entry ], [ %sum.0.lcssa.ph.ph, %latch_exit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latch_exit.unr-lcssa.loopexit ]
-; EPILOG-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latch_exit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.7, %latch.7 ]
+; EPILOG-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.7, %latch.7 ]
+; EPILOG-NEXT:    %sum.02.unr = phi i32 [ %add.7, %latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latch_exit
 ; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latch_exit.unr-lcssa ]
+; EPILOG-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latch_exit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
-; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %header.epil.preheader ]
+; EPILOG-NEXT:    %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %latch.epil ], [ %indvars.iv.epil.init, %header.epil.preheader ]
+; EPILOG-NEXT:    %sum.02.epil = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil.init, %header.epil.preheader ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
-; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit2, label %for.exiting_block.epil
+; EPILOG-NEXT:    br i1 false, label %for.exit2.loopexit3, label %for.exiting_block.epil
 ; EPILOG:       for.exiting_block.epil:
 ; EPILOG-NEXT:    %cmp.epil = icmp eq i64 %n, 42
-; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit2, label %latch.epil
+; EPILOG-NEXT:    br i1 %cmp.epil, label %for.exit2.loopexit3, label %latch.epil
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil
 ; EPILOG-NEXT:    %load.epil = load i32, ptr %arrayidx.epil, align 4
@@ -3874,11 +3859,11 @@ define i32 @test6(ptr nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG:       for.exit2.loopexit:
 ; EPILOG-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], [ 42, %for.exiting_block.3 ], [ %add.3, %latch.3 ], [ 42, %for.exiting_block.4 ], [ %add.4, %latch.4 ], [ 42, %for.exiting_block.5 ], [ %add.5, %latch.5 ], [ 42, %for.exiting_block.6 ], [ %add.6, %latch.6 ], [ 42, %for.exiting_block.7 ]
 ; EPILOG-NEXT:    br label %for.exit2
-; EPILOG:       for.exit2.loopexit2:
-; EPILOG-NEXT:    %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
+; EPILOG:       for.exit2.loopexit3:
+; EPILOG-NEXT:    %retval.ph4 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ]
 ; EPILOG-NEXT:    br label %for.exit2
 ; EPILOG:       for.exit2:
-; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ]
+; EPILOG-NEXT:    %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph4, %for.exit2.loopexit3 ]
 ; EPILOG-NEXT:    %addx = add i32 %retval, %x
 ; EPILOG-NEXT:    br i1 %cond, label %exit_true, label %exit_false
 ; EPILOG:       exit_true:
@@ -3892,7 +3877,7 @@ define i32 @test6(ptr nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %latch_exit.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
@@ -3920,19 +3905,18 @@ define i32 @test6(ptr nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK-NEXT:    %indvars.iv.next.1 = add i64 %indvars.iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latch_exit.unr-lcssa.loopexit, label %header, !llvm.loop !10
-; EPILOG-BLOCK:       latch_exit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr.ph = phi i32 [ %add.1, %latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %latch_exit.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %latch_exit.unr-lcssa, label %header, !llvm.loop !10
 ; EPILOG-BLOCK:       latch_exit.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph = phi i32 [ poison, %entry ], [ %sum.0.lcssa.ph.ph, %latch_exit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.unr.ph, %latch_exit.unr-lcssa.loopexit ]
-; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ 0, %entry ], [ %sum.02.unr.ph, %latch_exit.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %sum.0.lcssa.ph = phi i32 [ %add.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %indvars.iv.unr = phi i64 [ %indvars.iv.next.1, %latch.1 ]
+; EPILOG-BLOCK-NEXT:    %sum.02.unr = phi i32 [ %add.1, %latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latch_exit
 ; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %indvars.iv.epil.init = phi i64 [ 0, %entry ], [ %indvars.iv.unr, %latch_exit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %sum.02.epil.init = phi i32 [ 0, %entry ], [ %sum.02.unr, %latch_exit.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod2 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod2)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %for.exit2, label %for.exiting_block.epil
@@ -3940,9 +3924,9 @@ define i32 @test6(ptr nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK-NEXT:    %cmp.epil = icmp eq i64 %n, 42
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp.epil, label %for.exit2, label %latch.epil
 ; EPILOG-BLOCK:       latch.epil:
-; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.unr
+; EPILOG-BLOCK-NEXT:    %arrayidx.epil = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.epil.init
 ; EPILOG-BLOCK-NEXT:    %load.epil = load i32, ptr %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %load.epil, %sum.02.unr
+; EPILOG-BLOCK-NEXT:    %add.epil = add nsw i32 %load.epil, %sum.02.epil.init
 ; EPILOG-BLOCK-NEXT:    br label %latch_exit
 ; EPILOG-BLOCK:       latch_exit:
 ; EPILOG-BLOCK-NEXT:    %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %add.epil, %latch.epil ]
@@ -3951,7 +3935,7 @@ define i32 @test6(ptr nocapture %a, i64 %n, i1 %cond, i32 %x) {
 ; EPILOG-BLOCK-NEXT:    %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %for.exit2
 ; EPILOG-BLOCK:       for.exit2:
-; EPILOG-BLOCK-NEXT:    %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
+; EPILOG-BLOCK-NEXT:    %retval = phi i32 [ %sum.02.epil.init, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
 ; EPILOG-BLOCK-NEXT:    %addx = add i32 %retval, %x
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %exit_true, label %exit_false
 ; EPILOG-BLOCK:       exit_true:
@@ -4213,7 +4197,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-NEXT:    %2 = add i64 %1, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %1, 7
 ; EPILOG-NEXT:    %3 = icmp ult i64 %2, 7
-; EPILOG-NEXT:    br i1 %3, label %latchexit.unr-lcssa, label %preheader.new
+; EPILOG-NEXT:    br i1 %3, label %header.epil.preheader, label %preheader.new
 ; EPILOG:       preheader.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %1, %xtraiter
 ; EPILOG-NEXT:    br label %header
@@ -4239,20 +4223,20 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-NEXT:    %add.7 = add nuw nsw i64 %i6, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %latchexit.unr-lcssa.loopexit
-; EPILOG:       latchexit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %i6.unr.ph = phi i64 [ %add.7, %latch.7 ]
-; EPILOG-NEXT:    br label %latchexit.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %latchexit.unr-lcssa
 ; EPILOG:       latchexit.unr-lcssa:
-; EPILOG-NEXT:    %i6.unr = phi i64 [ 1, %preheader ], [ %i6.unr.ph, %latchexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %i6.unr = phi i64 [ %add.7, %latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchexit
 ; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    %i6.epil.init = phi i64 [ 1, %preheader ], [ %i6.unr, %latchexit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
-; EPILOG-NEXT:    %i6.epil = phi i64 [ %i6.unr, %header.epil.preheader ], [ %add.epil, %latch.epil ]
+; EPILOG-NEXT:    %i6.epil = phi i64 [ %i6.epil.init, %header.epil.preheader ], [ %add.epil, %latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
-; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit1, label %latch.epil
+; EPILOG-NEXT:    br i1 false, label %loopexit1.loopexit2, label %latch.epil
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %add.epil = add nuw nsw i64 %i6.epil, 1
 ; EPILOG-NEXT:    %i9.epil = icmp slt i64 %add.epil, %sext
@@ -4268,11 +4252,11 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG:       loopexit1.loopexit:
 ; EPILOG-NEXT:    %sext3.ph = phi i32 [ %shft, %header ], [ %shft, %latch ], [ %shft, %latch.1 ], [ %shft, %latch.2 ], [ %shft, %latch.3 ], [ %shft, %latch.4 ], [ %shft, %latch.5 ], [ %shft, %latch.6 ]
 ; EPILOG-NEXT:    br label %loopexit1
-; EPILOG:       loopexit1.loopexit1:
-; EPILOG-NEXT:    %sext3.ph2 = phi i32 [ %shft, %header.epil ]
+; EPILOG:       loopexit1.loopexit2:
+; EPILOG-NEXT:    %sext3.ph3 = phi i32 [ %shft, %header.epil ]
 ; EPILOG-NEXT:    br label %loopexit1
 ; EPILOG:       loopexit1:
-; EPILOG-NEXT:    %sext3 = phi i32 [ %sext3.ph, %loopexit1.loopexit ], [ %sext3.ph2, %loopexit1.loopexit1 ]
+; EPILOG-NEXT:    %sext3 = phi i32 [ %sext3.ph, %loopexit1.loopexit ], [ %sext3.ph3, %loopexit1.loopexit2 ]
 ; EPILOG-NEXT:    ret i32 %sext3
 ;
 ; EPILOG-BLOCK-LABEL: @test7(
@@ -4287,7 +4271,7 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-BLOCK-NEXT:    %2 = add i64 %1, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %1, 1
 ; EPILOG-BLOCK-NEXT:    %3 = icmp ult i64 %2, 1
-; EPILOG-BLOCK-NEXT:    br i1 %3, label %latchexit.unr-lcssa, label %preheader.new
+; EPILOG-BLOCK-NEXT:    br i1 %3, label %header.epil.preheader, label %preheader.new
 ; EPILOG-BLOCK:       preheader.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %1, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
@@ -4301,13 +4285,13 @@ define i32 @test7(i32 %arg, i32 %arg1, i32 %arg2) {
 ; EPILOG-BLOCK-NEXT:    %add.1 = add nuw nsw i64 %i6, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %latchexit.unr-lcssa.loopexit, !llvm.loop !11
-; EPILOG-BLOCK:       latchexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    br label %latchexit.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %latchexit.unr-lcssa, !llvm.loop !11
 ; EPILOG-BLOCK:       latchexit.unr-lcssa:
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %latchexit
 ; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %loopexit1, label %latch.epil
@@ -4480,7 +4464,7 @@ define void @test8() {
 ; EPILOG-NEXT:    br label %outerloop
 ; EPILOG:       outerloop.loopexit.loopexit:
 ; EPILOG-NEXT:    br label %outerloop.loopexit
-; EPILOG:       outerloop.loopexit.loopexit1:
+; EPILOG:       outerloop.loopexit.loopexit2:
 ; EPILOG-NEXT:    br label %outerloop.loopexit
 ; EPILOG:       outerloop.loopexit:
 ; EPILOG-NEXT:    br label %outerloop
@@ -4490,7 +4474,7 @@ define void @test8() {
 ; EPILOG-NEXT:    %1 = sub i64 99, %i
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %exit.unr-lcssa, label %outerloop.new
+; EPILOG-NEXT:    br i1 %2, label %innerH.epil.preheader, label %outerloop.new
 ; EPILOG:       outerloop.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %innerH
@@ -4516,21 +4500,21 @@ define void @test8() {
 ; EPILOG:       latch.7:
 ; EPILOG-NEXT:    %niter.next.7 = add nuw nsw i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %innerH, label %exit.unr-lcssa.loopexit
-; EPILOG:       exit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %i3.unr.ph = phi i64 [ %i4.7, %latch.7 ]
-; EPILOG-NEXT:    br label %exit.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %innerH, label %exit.unr-lcssa
 ; EPILOG:       exit.unr-lcssa:
-; EPILOG-NEXT:    %i3.unr = phi i64 [ %i, %outerloop ], [ %i3.unr.ph, %exit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %i3.unr = phi i64 [ %i4.7, %latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %innerH.epil.preheader, label %exit.loopexit
 ; EPILOG:       innerH.epil.preheader:
+; EPILOG-NEXT:    %i3.epil.init = phi i64 [ %i, %outerloop ], [ %i3.unr, %exit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %innerH.epil
 ; EPILOG:       innerH.epil:
-; EPILOG-NEXT:    %i3.epil = phi i64 [ %i4.epil, %latch.epil ], [ %i3.unr, %innerH.epil.preheader ]
+; EPILOG-NEXT:    %i3.epil = phi i64 [ %i4.epil, %latch.epil ], [ %i3.epil.init, %innerH.epil.preheader ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %innerH.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    %i4.epil = add nuw nsw i64 %i3.epil, 1
-; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit1, label %latch.epil
+; EPILOG-NEXT:    br i1 false, label %outerloop.loopexit.loopexit2, label %latch.epil
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %i6.epil = icmp ult i64 %i4.epil, 100
 ; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
@@ -4549,27 +4533,26 @@ define void @test8() {
 ; EPILOG-BLOCK:       outerloop.loopexit.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %outerloop.loopexit
 ; EPILOG-BLOCK:       outerloop.loopexit:
-; EPILOG-BLOCK-NEXT:    br i1 false, label %exit.unr-lcssa.1, label %outerloop.new.1
+; EPILOG-BLOCK-NEXT:    br i1 false, label %innerH.epil.preheader.1, label %outerloop.new.1
 ; EPILOG-BLOCK:       outerloop.new.1:
 ; EPILOG-BLOCK-NEXT:    br label %innerH.1
 ; EPILOG-BLOCK:       innerH.1:
 ; EPILOG-BLOCK-NEXT:    %i3.1 = phi i64 [ 0, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
 ; EPILOG-BLOCK-NEXT:    %niter.1 = phi i64 [ 0, %outerloop.new.1 ], [ %niter.next.1.1, %latch.1.1 ]
-; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.12
-; EPILOG-BLOCK:       latch.12:
+; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.13
+; EPILOG-BLOCK:       latch.13:
 ; EPILOG-BLOCK-NEXT:    %i4.1.1 = add nuw nsw i64 %i3.1, 2
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.1.1
 ; EPILOG-BLOCK:       latch.1.1:
 ; EPILOG-BLOCK-NEXT:    %niter.next.1.1 = add i64 %niter.1, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i64 %niter.next.1.1, 100
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %innerH.1, label %exit.unr-lcssa.loopexit.1, !llvm.loop !12
-; EPILOG-BLOCK:       exit.unr-lcssa.loopexit.1:
-; EPILOG-BLOCK-NEXT:    br label %exit.unr-lcssa.1
-; EPILOG-BLOCK:       outerloop.loopexit.loopexit.1:
-; EPILOG-BLOCK-NEXT:    br label %outerloop.loopexit.1
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %innerH.1, label %exit.unr-lcssa.1, !llvm.loop !12
 ; EPILOG-BLOCK:       exit.unr-lcssa.1:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %innerH.epil.preheader.1, label %exit.loopexit
+; EPILOG-BLOCK:       outerloop.loopexit.loopexit.1:
+; EPILOG-BLOCK-NEXT:    br label %outerloop.loopexit.1
 ; EPILOG-BLOCK:       innerH.epil.preheader.1:
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 false)
 ; EPILOG-BLOCK-NEXT:    br label %innerH.epil.1
 ; EPILOG-BLOCK:       innerH.epil.1:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit.1, label %latch.epil
@@ -4581,7 +4564,7 @@ define void @test8() {
 ; EPILOG-BLOCK-NEXT:    %1 = sub i64 99, %i
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %exit.unr-lcssa, label %outerloop.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %innerH.epil.preheader, label %outerloop.new
 ; EPILOG-BLOCK:       outerloop.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %innerH
@@ -4595,13 +4578,13 @@ define void @test8() {
 ; EPILOG-BLOCK:       latch.1:
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %innerH, label %exit.unr-lcssa.loopexit, !llvm.loop !12
-; EPILOG-BLOCK:       exit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    br label %exit.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %innerH, label %exit.unr-lcssa, !llvm.loop !12
 ; EPILOG-BLOCK:       exit.unr-lcssa:
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %innerH.epil.preheader, label %exit.loopexit
 ; EPILOG-BLOCK:       innerH.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %innerH.epil
 ; EPILOG-BLOCK:       innerH.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 false, label %outerloop.loopexit, label %latch.epil
@@ -4788,7 +4771,7 @@ define ptr addrspace(1) @test9(ptr nocapture readonly %arg, i32 %n) {
 ; EPILOG-NEXT:    %2 = add i32 %1, -1
 ; EPILOG-NEXT:    %xtraiter = and i32 %1, 7
 ; EPILOG-NEXT:    %3 = icmp ult i32 %2, 7
-; EPILOG-NEXT:    br i1 %3, label %outerLatch.loopexit.unr-lcssa, label %preheader.new
+; EPILOG-NEXT:    br i1 %3, label %header.epil.preheader, label %preheader.new
 ; EPILOG:       preheader.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i32 %1, %xtraiter
 ; EPILOG-NEXT:    br label %header
@@ -4799,11 +4782,11 @@ define ptr addrspace(1) @test9(ptr nocapture readonly %arg, i32 %n) {
 ; EPILOG:       innerexit.loopexit:
 ; EPILOG-NEXT:    %trip.lcssa.ph = phi i32 [ %trip, %header ], [ %trip, %latch ], [ %trip, %latch.1 ], [ %trip, %latch.2 ], [ %trip, %latch.3 ], [ %trip, %latch.4 ], [ %trip, %latch.5 ], [ %trip, %latch.6 ]
 ; EPILOG-NEXT:    br label %innerexit
-; EPILOG:       innerexit.loopexit1:
-; EPILOG-NEXT:    %trip.lcssa.ph2 = phi i32 [ %trip, %header.epil ]
+; EPILOG:       innerexit.loopexit2:
+; EPILOG-NEXT:    %trip.lcssa.ph3 = phi i32 [ %trip, %header.epil ]
 ; EPILOG-NEXT:    br label %innerexit
 ; EPILOG:       innerexit:
-; EPILOG-NEXT:    %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph2, %innerexit.loopexit1 ]
+; EPILOG-NEXT:    %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph3, %innerexit.loopexit2 ]
 ; EPILOG-NEXT:    %i9 = call ptr addrspace(1) @foo(i32 %trip.lcssa)
 ; EPILOG-NEXT:    ret ptr addrspace(1) %i9
 ; EPILOG:       latch:
@@ -4824,21 +4807,21 @@ define ptr addrspace(1) @test9(ptr nocapture readonly %arg, i32 %n) {
 ; EPILOG-NEXT:    %iv.next.7 = add nuw nsw i64 %phi, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i32 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i32 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit
-; EPILOG:       outerLatch.loopexit.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %phi.unr.ph = phi i64 [ %iv.next.7, %latch.7 ]
-; EPILOG-NEXT:    br label %outerLatch.loopexit.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %header, label %outerLatch.loopexit.unr-lcssa
 ; EPILOG:       outerLatch.loopexit.unr-lcssa:
-; EPILOG-NEXT:    %phi.unr = phi i64 [ %i4, %preheader ], [ %phi.unr.ph, %outerLatch.loopexit.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %phi.unr = phi i64 [ %iv.next.7, %latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i32 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %outerLatch.loopexit
 ; EPILOG:       header.epil.preheader:
+; EPILOG-NEXT:    %phi.epil.init = phi i64 [ %i4, %preheader ], [ %phi.unr, %outerLatch.loopexit.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i32 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %header.epil
 ; EPILOG:       header.epil:
-; EPILOG-NEXT:    %phi.epil = phi i64 [ %phi.unr, %header.epil.preheader ], [ %iv.next.epil, %latch.epil ]
+; EPILOG-NEXT:    %phi.epil = phi i64 [ %phi.epil.init, %header.epil.preheader ], [ %iv.next.epil, %latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i32 [ 0, %header.epil.preheader ], [ %epil.iter.next, %latch.epil ]
 ; EPILOG-NEXT:    %i7.epil = trunc i64 %phi.epil to i32
-; EPILOG-NEXT:    br i1 true, label %latch.epil, label %innerexit.loopexit1
+; EPILOG-NEXT:    br i1 true, label %latch.epil, label %innerexit.loopexit2
 ; EPILOG:       latch.epil:
 ; EPILOG-NEXT:    %i11.epil = add nsw i32 %i7.epil, 1
 ; EPILOG-NEXT:    %innercnd.epil = icmp slt i32 %i11.epil, %trip
@@ -4866,7 +4849,7 @@ define ptr addrspace(1) @test9(ptr nocapture readonly %arg, i32 %n) {
 ; EPILOG-BLOCK-NEXT:    %2 = add i32 %1, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i32 %1, 1
 ; EPILOG-BLOCK-NEXT:    %3 = icmp ult i32 %2, 1
-; EPILOG-BLOCK-NEXT:    br i1 %3, label %outerLatch.loopexit.unr-lcssa, label %preheader.new
+; EPILOG-BLOCK-NEXT:    br i1 %3, label %header.epil.preheader, label %preheader.new
 ; EPILOG-BLOCK:       preheader.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i32 %1, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %header
@@ -4877,17 +4860,17 @@ define ptr addrspace(1) @test9(ptr nocapture readonly %arg, i32 %n) {
 ; EPILOG-BLOCK:       innerexit.loopexit.loopexit:
 ; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ]
 ; EPILOG-BLOCK-NEXT:    br label %innerexit.loopexit
-; EPILOG-BLOCK:       innerexit.loopexit.loopexit4:
-; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph.ph5 = phi i32 [ %trip.1, %latch.13 ], [ %trip.1, %header.1 ]
+; EPILOG-BLOCK:       innerexit.loopexit.loopexit5:
+; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph.ph6 = phi i32 [ %trip.1, %latch.14 ], [ %trip.1, %header.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %innerexit.loopexit
 ; EPILOG-BLOCK:       innerexit.loopexit:
-; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph = phi i32 [ %trip.lcssa.ph.ph, %innerexit.loopexit.loopexit ], [ %trip.lcssa.ph.ph5, %innerexit.loopexit.loopexit4 ]
+; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph = phi i32 [ %trip.lcssa.ph.ph, %innerexit.loopexit.loopexit ], [ %trip.lcssa.ph.ph6, %innerexit.loopexit.loopexit5 ]
 ; EPILOG-BLOCK-NEXT:    br label %innerexit
-; EPILOG-BLOCK:       innerexit.loopexit1:
-; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph2 = phi i32 [ %trip, %header.epil ], [ %trip.1, %header.epil.1 ]
+; EPILOG-BLOCK:       innerexit.loopexit2:
+; EPILOG-BLOCK-NEXT:    %trip.lcssa.ph3 = phi i32 [ %trip, %header.epil ], [ %trip.1, %header.epil.1 ]
 ; EPILOG-BLOCK-NEXT:    br label %innerexit
 ; EPILOG-BLOCK:       innerexit:
-; EPILOG-BLOCK-NEXT:    %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph2, %innerexit.loopexit1 ]
+; EPILOG-BLOCK-NEXT:    %trip.lcssa = phi i32 [ %trip.lcssa.ph, %innerexit.loopexit ], [ %trip.lcssa.ph3, %innerexit.loopexit2 ]
 ; EPILOG-BLOCK-NEXT:    %i9 = call ptr addrspace(1) @foo(i32 %trip.lcssa)
 ; EPILOG-BLOCK-NEXT:    ret ptr addrspace(1) %i9
 ; EPILOG-BLOCK:       latch:
@@ -4896,16 +4879,16 @@ define ptr addrspace(1) @test9(ptr nocapture readonly %arg, i32 %n) {
 ; EPILOG-BLOCK-NEXT:    %iv.next.1 = add nuw nsw i64 %phi, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i32 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i32 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %outerLatch.loopexit.unr-lcssa.loopexit, !llvm.loop !14
-; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %header, label %outerLatch.loopexit.unr-lcssa, !llvm.loop !14
 ; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa:
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i32 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %header.epil.preheader, label %outerLatch.loopexit
 ; EPILOG-BLOCK:       header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i32 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil
 ; EPILOG-BLOCK:       header.epil:
-; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.epil, label %innerexit.loopexit1
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.epil, label %innerexit.loopexit2
 ; EPILOG-BLOCK:       latch.epil:
 ; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit
 ; EPILOG-BLOCK:       outerLatch.loopexit:
@@ -4919,30 +4902,30 @@ define ptr addrspace(1) @test9(ptr nocapture readonly %arg, i32 %n) {
 ; EPILOG-BLOCK-NEXT:    %5 = add i32 %4, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter.1 = and i32 %4, 1
 ; EPILOG-BLOCK-NEXT:    %6 = icmp ult i32 %5, 1
-; EPILOG-BLOCK-NEXT:    br i1 %6, label %outerLatch.loopexit.unr-lcssa.1, label %preheader.new.1
+; EPILOG-BLOCK-NEXT:    br i1 %6, label %header.epil.preheader.1, label %preheader.new.1
 ; EPILOG-BLOCK:       preheader.new.1:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter.1 = sub i32 %4, %xtraiter.1
 ; EPILOG-BLOCK-NEXT:    br label %header.1
 ; EPILOG-BLOCK:       header.1:
 ; EPILOG-BLOCK-NEXT:    %phi.1 = phi i64 [ 0, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ]
 ; EPILOG-BLOCK-NEXT:    %niter.1 = phi i32 [ 0, %preheader.new.1 ], [ %niter.next.1.1, %latch.1.1 ]
-; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.13, label %innerexit.loopexit.loopexit4
-; EPILOG-BLOCK:       latch.13:
-; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.1.1, label %innerexit.loopexit.loopexit4
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.14, label %innerexit.loopexit.loopexit5
+; EPILOG-BLOCK:       latch.14:
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.1.1, label %innerexit.loopexit.loopexit5
 ; EPILOG-BLOCK:       latch.1.1:
 ; EPILOG-BLOCK-NEXT:    %iv.next.1.1 = add nuw nsw i64 %phi.1, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1.1 = add i32 %niter.1, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1.1 = icmp ne i32 %niter.next.1.1, %unroll_iter.1
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %header.1, label %outerLatch.loopexit.unr-lcssa.loopexit.1, !llvm.loop !14
-; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa.loopexit.1:
-; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit.unr-lcssa.1
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1.1, label %header.1, label %outerLatch.loopexit.unr-lcssa.1, !llvm.loop !14
 ; EPILOG-BLOCK:       outerLatch.loopexit.unr-lcssa.1:
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod.1 = icmp ne i32 %xtraiter.1, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod.1, label %header.epil.preheader.1, label %outerLatch.loopexit.1
 ; EPILOG-BLOCK:       header.epil.preheader.1:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1.1 = icmp ne i32 %xtraiter.1, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1.1)
 ; EPILOG-BLOCK-NEXT:    br label %header.epil.1
 ; EPILOG-BLOCK:       header.epil.1:
-; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.epil.1, label %innerexit.loopexit1
+; EPILOG-BLOCK-NEXT:    br i1 true, label %latch.epil.1, label %innerexit.loopexit2
 ; EPILOG-BLOCK:       latch.epil.1:
 ; EPILOG-BLOCK-NEXT:    br label %outerLatch.loopexit.1
 ; EPILOG-BLOCK:       outerLatch.loopexit.1:
@@ -5171,7 +5154,7 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %exit2.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %loop_header
@@ -5220,28 +5203,28 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.unr-lcssa.loopexit
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.unr-lcssa
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit1:
+; EPILOG:       exit1.loopexit2:
 ; EPILOG-NEXT:    br label %exit1
 ; EPILOG:       exit1:
 ; EPILOG-NEXT:    ret void
-; EPILOG:       exit2.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit2.unr-lcssa
 ; EPILOG:       exit2.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %iv.unr = phi i64 [ %iv_next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2
 ; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit2.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.epil.init, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
-; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_latch.epil, label %exit1.loopexit1
+; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_latch.epil, label %exit1.loopexit2
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
@@ -5259,7 +5242,7 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %exit2.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
@@ -5278,23 +5261,23 @@ define void @test10(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa.loopexit, !llvm.loop !16
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa, !llvm.loop !16
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       exit2.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %exit2.unr-lcssa
 ; EPILOG-BLOCK:       exit2.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ %iv_next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit2.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
 ; EPILOG-BLOCK-NEXT:    call void @bar()
-; EPILOG-BLOCK-NEXT:    %cmp_early.epil = icmp ne i64 %iv.unr, %trip2
+; EPILOG-BLOCK-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil.init, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early.epil, label %loop_latch.epil, label %exit1
 ; EPILOG-BLOCK:       loop_latch.epil:
 ; EPILOG-BLOCK-NEXT:    br label %exit2
@@ -5460,7 +5443,7 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %exit2.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %loop_header
@@ -5494,27 +5477,27 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.unr-lcssa.loopexit
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit2.unr-lcssa
 ; EPILOG:       exit1.loopexit:
 ; EPILOG-NEXT:    br label %exit1
-; EPILOG:       exit1.loopexit1:
+; EPILOG:       exit1.loopexit2:
 ; EPILOG-NEXT:    br label %exit1
 ; EPILOG:       exit1:
 ; EPILOG-NEXT:    ret void
-; EPILOG:       exit2.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit2.unr-lcssa
 ; EPILOG:       exit2.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %iv.unr = phi i64 [ %iv_next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2
 ; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit2.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.epil.init, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
-; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.loopexit1
+; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.loopexit2
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
@@ -5532,7 +5515,7 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %exit2.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
@@ -5548,17 +5531,17 @@ define void @test11(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa.loopexit, !llvm.loop !17
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit2.unr-lcssa, !llvm.loop !17
 ; EPILOG-BLOCK:       exit1.loopexit:
 ; EPILOG-BLOCK-NEXT:    br label %exit1
 ; EPILOG-BLOCK:       exit1:
 ; EPILOG-BLOCK-NEXT:    ret void
-; EPILOG-BLOCK:       exit2.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    br label %exit2.unr-lcssa
 ; EPILOG-BLOCK:       exit2.unr-lcssa:
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
 ; EPILOG-BLOCK-NEXT:    call void @bar()
@@ -5706,7 +5689,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %exit1.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %loop_header
@@ -5771,33 +5754,33 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
-; EPILOG:       exit1.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit1.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa
 ; EPILOG:       exit1.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %iv.unr = phi i64 [ %iv_next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
 ; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit1.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.epil.init, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
-; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit2
 ; EPILOG:       loop_exiting_bb2.epil:
-; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit2
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
 ; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
 ; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !16
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit2, !llvm.loop !16
 ; EPILOG:       exit1.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    br label %exit1.epilog-lcssa
-; EPILOG:       exit1.epilog-lcssa.loopexit1:
+; EPILOG:       exit1.epilog-lcssa.loopexit2:
 ; EPILOG-NEXT:    br label %exit1.epilog-lcssa
 ; EPILOG:       exit1.epilog-lcssa:
 ; EPILOG-NEXT:    br label %exit1
@@ -5810,7 +5793,7 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %exit1.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
@@ -5833,19 +5816,19 @@ define void @test12(i64 %trip, i64 %trip2, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !18
-; EPILOG-BLOCK:       exit1.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa, !llvm.loop !18
 ; EPILOG-BLOCK:       exit1.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ %iv_next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit1.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
 ; EPILOG-BLOCK-NEXT:    call void @bar()
-; EPILOG-BLOCK-NEXT:    %cmp_early.epil = icmp ne i64 %iv.unr, %trip2
+; EPILOG-BLOCK-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil.init, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa
 ; EPILOG-BLOCK:       loop_exiting_bb2.epil:
 ; EPILOG-BLOCK-NEXT:    br i1 %cond, label %loop_latch.epil, label %exit1.epilog-lcssa
@@ -6038,7 +6021,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %exit1.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %loop_header
@@ -6111,34 +6094,34 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
-; EPILOG:       exit1.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit1.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa
 ; EPILOG:       exit1.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %iv.unr = phi i64 [ %iv_next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
 ; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit1.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.epil.init, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
 ; EPILOG-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil, %trip2
-; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit2
 ; EPILOG:       loop_exiting_bb2.epil:
 ; EPILOG-NEXT:    %unknown.epil = call i1 @unknown_cond()
-; EPILOG-NEXT:    br i1 %unknown.epil, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG-NEXT:    br i1 %unknown.epil, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit2
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
 ; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
 ; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !17
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit2, !llvm.loop !17
 ; EPILOG:       exit1.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    br label %exit1.epilog-lcssa
-; EPILOG:       exit1.epilog-lcssa.loopexit1:
+; EPILOG:       exit1.epilog-lcssa.loopexit2:
 ; EPILOG-NEXT:    br label %exit1.epilog-lcssa
 ; EPILOG:       exit1.epilog-lcssa:
 ; EPILOG-NEXT:    br label %exit1
@@ -6151,7 +6134,7 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %exit1.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
@@ -6176,19 +6159,19 @@ define void @test13(i64 %trip, i64 %trip2) {
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !19
-; EPILOG-BLOCK:       exit1.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
-; EPILOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa, !llvm.loop !19
 ; EPILOG-BLOCK:       exit1.unr-lcssa:
-; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT:    %iv.unr = phi i64 [ %iv_next.1, %loop_latch.1 ]
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit1.unr-lcssa ]
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
 ; EPILOG-BLOCK-NEXT:    call void @bar()
-; EPILOG-BLOCK-NEXT:    %cmp_early.epil = icmp ne i64 %iv.unr, %trip2
+; EPILOG-BLOCK-NEXT:    %cmp_early.epil = icmp ne i64 %iv.epil.init, %trip2
 ; EPILOG-BLOCK-NEXT:    br i1 %cmp_early.epil, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa
 ; EPILOG-BLOCK:       loop_exiting_bb2.epil:
 ; EPILOG-BLOCK-NEXT:    %unknown.epil = call i1 @unknown_cond()
@@ -6393,7 +6376,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-NEXT:    %xtraiter = and i64 %0, 7
 ; EPILOG-NEXT:    %2 = icmp ult i64 %1, 7
-; EPILOG-NEXT:    br i1 %2, label %exit1.unr-lcssa, label %entry.new
+; EPILOG-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-NEXT:    br label %loop_header
@@ -6451,33 +6434,33 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-NEXT:    %iv_next.7 = add i64 %iv, 8
 ; EPILOG-NEXT:    %niter.next.7 = add i64 %niter, 8
 ; EPILOG-NEXT:    %niter.ncmp.7 = icmp ne i64 %niter.next.7, %unroll_iter
-; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa.loopexit
-; EPILOG:       exit1.unr-lcssa.loopexit:
-; EPILOG-NEXT:    %iv.unr.ph = phi i64 [ %iv_next.7, %loop_latch.7 ]
-; EPILOG-NEXT:    br label %exit1.unr-lcssa
+; EPILOG-NEXT:    br i1 %niter.ncmp.7, label %loop_header, label %exit1.unr-lcssa
 ; EPILOG:       exit1.unr-lcssa:
-; EPILOG-NEXT:    %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit1.unr-lcssa.loopexit ]
+; EPILOG-NEXT:    %iv.unr = phi i64 [ %iv_next.7, %loop_latch.7 ]
 ; EPILOG-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
 ; EPILOG:       loop_header.epil.preheader:
+; EPILOG-NEXT:    %iv.epil.init = phi i64 [ 0, %entry ], [ %iv.unr, %exit1.unr-lcssa ]
+; EPILOG-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-NEXT:    br label %loop_header.epil
 ; EPILOG:       loop_header.epil:
-; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
+; EPILOG-NEXT:    %iv.epil = phi i64 [ %iv.epil.init, %loop_header.epil.preheader ], [ %iv_next.epil, %loop_latch.epil ]
 ; EPILOG-NEXT:    %epil.iter = phi i64 [ 0, %loop_header.epil.preheader ], [ %epil.iter.next, %loop_latch.epil ]
 ; EPILOG-NEXT:    call void @bar()
-; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG-NEXT:    br i1 %cond, label %loop_exiting_bb2.epil, label %exit1.epilog-lcssa.loopexit2
 ; EPILOG:       loop_exiting_bb2.epil:
 ; EPILOG-NEXT:    %unknown.epil = call i1 @unknown_cond()
-; EPILOG-NEXT:    br i1 %unknown.epil, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit1
+; EPILOG-NEXT:    br i1 %unknown.epil, label %loop_latch.epil, label %exit1.epilog-lcssa.loopexit2
 ; EPILOG:       loop_latch.epil:
 ; EPILOG-NEXT:    %iv_next.epil = add i64 %iv.epil, 1
 ; EPILOG-NEXT:    %cmp.epil = icmp ne i64 %iv_next.epil, %trip
 ; EPILOG-NEXT:    %epil.iter.next = add i64 %epil.iter, 1
 ; EPILOG-NEXT:    %epil.iter.cmp = icmp ne i64 %epil.iter.next, %xtraiter
-; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit1, !llvm.loop !18
+; EPILOG-NEXT:    br i1 %epil.iter.cmp, label %loop_header.epil, label %exit1.epilog-lcssa.loopexit2, !llvm.loop !18
 ; EPILOG:       exit1.epilog-lcssa.loopexit:
 ; EPILOG-NEXT:    br label %exit1.epilog-lcssa
-; EPILOG:       exit1.epilog-lcssa.loopexit1:
+; EPILOG:       exit1.epilog-lcssa.loopexit2:
 ; EPILOG-NEXT:    br label %exit1.epilog-lcssa
 ; EPILOG:       exit1.epilog-lcssa:
 ; EPILOG-NEXT:    br label %exit1
@@ -6490,7 +6473,7 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %1 = add i64 %0, -1
 ; EPILOG-BLOCK-NEXT:    %xtraiter = and i64 %0, 1
 ; EPILOG-BLOCK-NEXT:    %2 = icmp ult i64 %1, 1
-; EPILOG-BLOCK-NEXT:    br i1 %2, label %exit1.unr-lcssa, label %entry.new
+; EPILOG-BLOCK-NEXT:    br i1 %2, label %loop_header.epil.preheader, label %entry.new
 ; EPILOG-BLOCK:       entry.new:
 ; EPILOG-BLOCK-NEXT:    %unroll_iter = sub i64 %0, %xtraiter
 ; EPILOG-BLOCK-NEXT:    br label %loop_header
@@ -6512,13 +6495,13 @@ define void @test14(i64 %trip, i1 %cond) {
 ; EPILOG-BLOCK-NEXT:    %iv_next.1 = add i64 %iv, 2
 ; EPILOG-BLOCK-NEXT:    %niter.next.1 = add i64 %niter, 2
 ; EPILOG-BLOCK-NEXT:    %niter.ncmp.1 = icmp ne i64 %niter.next.1, %unroll_iter
-; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa.loopexit, !llvm.loop !20
-; EPILOG-BLOCK:       exit1.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT:    br label %exit1.unr-lcssa
+; EPILOG-BLOCK-NEXT:    br i1 %niter.ncmp.1, label %loop_header, label %exit1.unr-lcssa, !llvm.loop !20
 ; EPILOG-BLOCK:       exit1.unr-lcssa:
 ; EPILOG-BLOCK-NEXT:    %lcmp.mod = icmp ne i64 %xtraiter, 0
 ; EPILOG-BLOCK-NEXT:    br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit1
 ; EPILOG-BLOCK:       loop_header.epil.preheader:
+; EPILOG-BLOCK-NEXT:    %lcmp.mod1 = icmp ne i64 %xtraiter, 0
+; EPILOG-BLOCK-NEXT:    call void @llvm.assume(i1 %lcmp.mod1)
 ; EPILOG-BLOCK-NEXT:    br label %loop_header.epil
 ; EPILOG-BLOCK:       loop_header.epil:
 ; EPILOG-BLOCK-NEXT:    call void @bar()
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
index 8acf74a..492de06 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -22,7 +22,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; EPILOG:   br i1 %cmp1, label %for.end, label %for.body.preheader, !prof [[EPILOG_PROF_0:![0-9]+]]
 ; EPILOG: for.body.preheader:
 ; EPILOG:   %xtraiter = and i32 %n
-; EPILOG:   br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !prof [[EPILOG_PROF_1:![0-9]+]]
+; EPILOG:   br i1 %1, label %for.body.epil.preheader, label %for.body.preheader.new, !prof [[EPILOG_PROF_1:![0-9]+]]
 
 ; EPILOG: for.end.loopexit.unr-lcssa:
 ; EPILOG:   %lcmp.mod = icmp ne i32 %xtraiter, 0
@@ -41,7 +41,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; NOPROLOG-NOT: %xtraiter = and i32 %n
 
 ; EPILOG: for.body.epil:
-; EPILOG:   %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ],  [ %indvars.iv.unr, %for.body.epil.preheader ]
+; EPILOG:   %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ],  [ %indvars.iv.epil.init, %for.body.epil.preheader ]
 ; EPILOG:   %epil.iter.next = add i32 %epil.iter, 1
 ; EPILOG:   %epil.iter.cmp = icmp ne i32 %epil.iter.next, %xtraiter
 ; EPILOG:   br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof [[EPILOG_PROF_3:![0-9]+]], !llvm.loop [[EPILOG_LOOP:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
index 492ddd1..0eeb3ad 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -8,9 +8,9 @@
 
 
 ; EPILOG: for.body.preheader:
-; EPILOG:   br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg [[PH_LOC:![0-9]+]]
+; EPILOG:   br i1 %1, label %for.body.epil.preheader, label %for.body.preheader.new, !dbg [[PH_LOC:![0-9]+]]
 ; EPILOG: for.body:
-; EPILOG:   br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[PH_LOC]]
+; EPILOG:   br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[PH_LOC]]
 ; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
 ; EPILOG: for.body.epil.preheader:
 ; EPILOG:   br label %for.body.epil, !dbg [[PH_LOC]]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
index 0e11fff..a573de2 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -8,8 +8,8 @@
 ; This test makes sure we're not unrolling 'odd' counts
 
 ; EPILOG: for.body:
-; EPILOG: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
-; EPILOG-NOT: br i1 %niter.ncmp.4, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
+; EPILOG: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa{{.*}}, label %for.body
+; EPILOG-NOT: br i1 %niter.ncmp.4, label %for.end.loopexit.unr-lcssa{{.*}}, label %for.body
 ; EPILOG: for.body.epil:
 
 ; PROLOG: for.body.prol:
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
index fa9f902..0cee4e2 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -69,7 +69,7 @@ define i3 @test(ptr %a, i3 %n) {
 ; UNROLL-4-NEXT:    [[TMP0:%.*]] = add i3 [[N]], -1
 ; UNROLL-4-NEXT:    [[XTRAITER:%.*]] = and i3 [[N]], 3
 ; UNROLL-4-NEXT:    [[TMP1:%.*]] = icmp ult i3 [[TMP0]], 3
-; UNROLL-4-NEXT:    br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; UNROLL-4-NEXT:    br i1 [[TMP1]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
 ; UNROLL-4:       for.body.preheader.new:
 ; UNROLL-4-NEXT:    [[UNROLL_ITER:%.*]] = sub i3 [[N]], [[XTRAITER]]
 ; UNROLL-4-NEXT:    br label [[FOR_BODY:%.*]]
@@ -95,23 +95,22 @@ define i3 @test(ptr %a, i3 %n) {
 ; UNROLL-4-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; UNROLL-4-NEXT:    [[NITER_NEXT_3]] = add i3 [[NITER]], -4
 ; UNROLL-4-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i3 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; UNROLL-4-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; UNROLL-4:       for.end.loopexit.unr-lcssa.loopexit:
+; UNROLL-4-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; UNROLL-4:       for.end.loopexit.unr-lcssa:
 ; UNROLL-4-NEXT:    [[ADD_LCSSA_PH_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
 ; UNROLL-4-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
 ; UNROLL-4-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
-; UNROLL-4-NEXT:    br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
-; UNROLL-4:       for.end.loopexit.unr-lcssa:
-; UNROLL-4-NEXT:    [[ADD_LCSSA_PH:%.*]] = phi i3 [ poison, [[FOR_BODY_PREHEADER]] ], [ [[ADD_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; UNROLL-4-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; UNROLL-4-NEXT:    [[SUM_02_UNR:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; UNROLL-4-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i3 [[XTRAITER]], 0
-; UNROLL-4-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
+; UNROLL-4-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER]], label [[FOR_END_LOOPEXIT:%.*]]
 ; UNROLL-4:       for.body.epil.preheader:
+; UNROLL-4-NEXT:    [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; UNROLL-4-NEXT:    [[SUM_02_EPIL_INIT:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; UNROLL-4-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i3 [[XTRAITER]], 0
+; UNROLL-4-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; UNROLL-4-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
 ; UNROLL-4:       for.body.epil:
-; UNROLL-4-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ]
-; UNROLL-4-NEXT:    [[SUM_02_EPIL:%.*]] = phi i3 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ]
+; UNROLL-4-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_EPIL_INIT]], [[FOR_BODY_EPIL_PREHEADER]] ]
+; UNROLL-4-NEXT:    [[SUM_02_EPIL:%.*]] = phi i3 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[FOR_BODY_EPIL_PREHEADER]] ]
 ; UNROLL-4-NEXT:    [[EPIL_ITER:%.*]] = phi i3 [ 0, [[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ]
 ; UNROLL-4-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
 ; UNROLL-4-NEXT:    [[TMP6:%.*]] = load i3, ptr [[ARRAYIDX_EPIL]], align 1
@@ -126,7 +125,7 @@ define i3 @test(ptr %a, i3 %n) {
 ; UNROLL-4-NEXT:    [[ADD_LCSSA_PH1:%.*]] = phi i3 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ]
 ; UNROLL-4-NEXT:    br label [[FOR_END_LOOPEXIT]]
 ; UNROLL-4:       for.end.loopexit:
-; UNROLL-4-NEXT:    [[ADD_LCSSA:%.*]] = phi i3 [ [[ADD_LCSSA_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_LCSSA_PH1]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ]
+; UNROLL-4-NEXT:    [[ADD_LCSSA:%.*]] = phi i3 [ [[ADD_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_LCSSA_PH1]], [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] ]
 ; UNROLL-4-NEXT:    br label [[FOR_END]]
 ; UNROLL-4:       for.end:
 ; UNROLL-4-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i3 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
index d3e5e0b..65ef3e4 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
@@ -19,7 +19,7 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
-; CHECK-NEXT:    br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], -8
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
@@ -94,20 +94,19 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
-; CHECK:       latchexit.unr-lcssa.loopexit:
-; CHECK-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[HEADER]]
 ; CHECK:       latchexit.unr-lcssa:
-; CHECK-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER]]
 ; CHECK:       header.epil.preheader:
+; CHECK-NEXT:    [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD3:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD3]])
 ; CHECK-NEXT:    br label [[HEADER_EPIL:%.*]]
 ; CHECK:       header.epil:
-; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
 ; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ], [ 0, [[HEADER_EPIL_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
 ; CHECK:       for.exiting_block.epil:
@@ -124,11 +123,11 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
 ; CHECK:       latchexit.epilog-lcssa:
 ; CHECK-NEXT:    br label [[LATCHEXIT]]
 ; CHECK:       latchexit:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; CHECK:       otherexit.loopexit:
 ; CHECK-NEXT:    br label [[OTHEREXIT:%.*]]
-; CHECK:       otherexit.loopexit3:
+; CHECK:       otherexit.loopexit4:
 ; CHECK-NEXT:    br label [[OTHEREXIT]]
 ; CHECK:       otherexit:
 ; CHECK-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
@@ -166,7 +165,7 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
 ; ENABLED-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
 ; ENABLED-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
-; ENABLED-NEXT:    br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; ENABLED-NEXT:    br i1 [[TMP2]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; ENABLED:       entry.new:
 ; ENABLED-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
 ; ENABLED-NEXT:    br label [[HEADER:%.*]]
@@ -248,23 +247,22 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
 ; ENABLED-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
-; ENABLED:       latchexit.unr-lcssa.loopexit:
+; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[HEADER]]
+; ENABLED:       latchexit.unr-lcssa:
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
 ; ENABLED-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
 ; ENABLED-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
-; ENABLED-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
-; ENABLED:       latchexit.unr-lcssa:
-; ENABLED-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; ENABLED-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; ENABLED-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; ENABLED-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[LATCHEXIT:%.*]]
+; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER]], label [[LATCHEXIT:%.*]]
 ; ENABLED:       header.epil.preheader:
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA]] ]
+; ENABLED-NEXT:    [[LCMP_MOD3:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; ENABLED-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD3]])
 ; ENABLED-NEXT:    br label [[HEADER_EPIL:%.*]]
 ; ENABLED:       header.epil:
-; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
 ; ENABLED-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
 ; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
 ; ENABLED:       for.exiting_block.epil:
@@ -283,12 +281,12 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH2:%.*]] = phi i32 [ [[ADD_EPIL]], [[LATCH_EPIL]] ]
 ; ENABLED-NEXT:    br label [[LATCHEXIT]]
 ; ENABLED:       latchexit:
-; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH2]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH2]], [[LATCHEXIT_EPILOG_LCSSA]] ]
 ; ENABLED-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; ENABLED:       otherexit.loopexit:
 ; ENABLED-NEXT:    [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
 ; ENABLED-NEXT:    br label [[OTHEREXIT:%.*]]
-; ENABLED:       otherexit.loopexit3:
+; ENABLED:       otherexit.loopexit4:
 ; ENABLED-NEXT:    [[SUM_02_LCSSA_PH4:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
 ; ENABLED-NEXT:    br label [[OTHEREXIT]]
 ; ENABLED:       otherexit:
@@ -380,7 +378,7 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
 ; ENABLED-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
 ; ENABLED-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
-; ENABLED-NEXT:    br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; ENABLED-NEXT:    br i1 [[TMP2]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; ENABLED:       entry.new:
 ; ENABLED-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
 ; ENABLED-NEXT:    br label [[HEADER:%.*]]
@@ -462,23 +460,22 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
 ; ENABLED-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
-; ENABLED:       latchexit.unr-lcssa.loopexit:
+; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[HEADER]]
+; ENABLED:       latchexit.unr-lcssa:
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
 ; ENABLED-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
 ; ENABLED-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
-; ENABLED-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
-; ENABLED:       latchexit.unr-lcssa:
-; ENABLED-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; ENABLED-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; ENABLED-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; ENABLED-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[LATCHEXIT:%.*]]
+; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER]], label [[LATCHEXIT:%.*]]
 ; ENABLED:       header.epil.preheader:
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA]] ]
+; ENABLED-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; ENABLED-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; ENABLED-NEXT:    br label [[HEADER_EPIL:%.*]]
 ; ENABLED:       header.epil:
-; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
 ; ENABLED-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
 ; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
 ; ENABLED:       for.exiting_block.epil:
@@ -497,12 +494,12 @@ define i32 @test2(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[LATCH_EPIL]] ]
 ; ENABLED-NEXT:    br label [[LATCHEXIT]]
 ; ENABLED:       latchexit:
-; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH1]], [[LATCHEXIT_EPILOG_LCSSA]] ]
 ; ENABLED-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; ENABLED:       otherexit.loopexit:
 ; ENABLED-NEXT:    [[RVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
 ; ENABLED-NEXT:    br label [[OTHEREXIT:%.*]]
-; ENABLED:       otherexit.loopexit2:
+; ENABLED:       otherexit.loopexit3:
 ; ENABLED-NEXT:    [[RVAL_PH3:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
 ; ENABLED-NEXT:    br label [[OTHEREXIT]]
 ; ENABLED:       otherexit:
@@ -747,7 +744,7 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
-; CHECK-NEXT:    br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], -8
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
@@ -822,20 +819,19 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
-; CHECK:       latchexit.unr-lcssa.loopexit:
-; CHECK-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[HEADER]]
 ; CHECK:       latchexit.unr-lcssa:
-; CHECK-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER]]
 ; CHECK:       header.epil.preheader:
+; CHECK-NEXT:    [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD3:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD3]])
 ; CHECK-NEXT:    br label [[HEADER_EPIL:%.*]]
 ; CHECK:       header.epil:
-; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
 ; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ], [ 0, [[HEADER_EPIL_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
 ; CHECK:       for.exiting_block.epil:
@@ -852,11 +848,11 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
 ; CHECK:       latchexit.epilog-lcssa:
 ; CHECK-NEXT:    br label [[LATCHEXIT]]
 ; CHECK:       latchexit:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; CHECK:       otherexit.loopexit:
 ; CHECK-NEXT:    br label [[OTHEREXIT:%.*]]
-; CHECK:       otherexit.loopexit3:
+; CHECK:       otherexit.loopexit4:
 ; CHECK-NEXT:    br label [[OTHEREXIT]]
 ; CHECK:       otherexit:
 ; CHECK-NEXT:    [[SUM_02_LCSSA:%.*]] = phi i32 [ [[SUM_02]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT3]] ]
@@ -899,7 +895,7 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], -1
 ; ENABLED-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
 ; ENABLED-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 7
-; ENABLED-NEXT:    br i1 [[TMP2]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; ENABLED-NEXT:    br i1 [[TMP2]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; ENABLED:       entry.new:
 ; ENABLED-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
 ; ENABLED-NEXT:    br label [[HEADER:%.*]]
@@ -981,23 +977,22 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
 ; ENABLED-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; ENABLED-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]]
-; ENABLED:       latchexit.unr-lcssa.loopexit:
+; ENABLED-NEXT:    br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[HEADER]]
+; ENABLED:       latchexit.unr-lcssa:
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
 ; ENABLED-NEXT:    [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[LATCH_7]] ]
 ; ENABLED-NEXT:    [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[LATCH_7]] ]
-; ENABLED-NEXT:    br label [[LATCHEXIT_UNR_LCSSA]]
-; ENABLED:       latchexit.unr-lcssa:
-; ENABLED-NEXT:    [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; ENABLED-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; ENABLED-NEXT:    [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; ENABLED-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[LATCHEXIT:%.*]]
+; ENABLED-NEXT:    br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER]], label [[LATCHEXIT:%.*]]
 ; ENABLED:       header.epil.preheader:
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_UNR_PH]], [[LATCHEXIT_UNR_LCSSA]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[LATCHEXIT_UNR_LCSSA]] ]
+; ENABLED-NEXT:    [[LCMP_MOD3:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; ENABLED-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD3]])
 ; ENABLED-NEXT:    br label [[HEADER_EPIL:%.*]]
 ; ENABLED:       header.epil:
-; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ]
-; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
+; ENABLED-NEXT:    [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_EPIL_INIT]], [[HEADER_EPIL_PREHEADER]] ]
 ; ENABLED-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ]
 ; ENABLED-NEXT:    br label [[FOR_EXITING_BLOCK_EPIL:%.*]]
 ; ENABLED:       for.exiting_block.epil:
@@ -1016,13 +1011,13 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
 ; ENABLED-NEXT:    [[SUM_0_LCSSA_PH2:%.*]] = phi i32 [ [[ADD_EPIL]], [[LATCH_EPIL]] ]
 ; ENABLED-NEXT:    br label [[LATCHEXIT]]
 ; ENABLED:       latchexit:
-; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH2]], [[LATCHEXIT_EPILOG_LCSSA]] ]
+; ENABLED-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH2]], [[LATCHEXIT_EPILOG_LCSSA]] ]
 ; ENABLED-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ; ENABLED:       otherexit.loopexit:
 ; ENABLED-NEXT:    [[SUM_02_LCSSA_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
 ; ENABLED-NEXT:    [[RVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ]
 ; ENABLED-NEXT:    br label [[OTHEREXIT:%.*]]
-; ENABLED:       otherexit.loopexit3:
+; ENABLED:       otherexit.loopexit4:
 ; ENABLED-NEXT:    [[SUM_02_LCSSA_PH4:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
 ; ENABLED-NEXT:    [[RVAL_PH5:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[FOR_EXITING_BLOCK_EPIL]] ]
 ; ENABLED-NEXT:    br label [[OTHEREXIT]]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
index 81fceb6..73f7fd3 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll
@@ -91,7 +91,7 @@ define dso_local void @cannotProveDivisibleTC(ptr noalias nocapture %a, ptr noal
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[N]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
 ; CHECK:       for.body.preheader.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -112,15 +112,15 @@ define dso_local void @cannotProveDivisibleTC(ptr noalias nocapture %a, ptr noal
 ; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[I_011]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i32 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
-; CHECK:       exit.loopexit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[I_011_UNR_PH:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ]
-; CHECK-NEXT:    br label [[EXIT_LOOPEXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       exit.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[I_011_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[I_011_UNR_PH]], [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[I_011_UNR1:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER]], label [[EXIT_LOOPEXIT:%.*]]
 ; CHECK:       for.body.epil.preheader:
+; CHECK-NEXT:    [[I_011_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[I_011_UNR1]], [[EXIT_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
 ; CHECK:       for.body.epil:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011_UNR]]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
index 0b9c6ac..a5ac2cf4 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
@@ -8,7 +8,7 @@ define i32 @test_add_reduction(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; CHECK:       [[ENTRY_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
@@ -27,28 +27,27 @@ define i32 @test_add_reduction(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; CHECK-NEXT:    [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
-; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
-; CHECK-NEXT:    [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
-; CHECK-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[EXIT_UNR_LCSSA]]:
-; CHECK-NEXT:    [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RES_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RDX_UNR1:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; CHECK:       [[LOOP_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; CHECK-NEXT:    br label %[[LOOP_EPIL:.*]]
 ; CHECK:       [[LOOP_EPIL]]:
-; CHECK-NEXT:    [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_UNR]]
+; CHECK-NEXT:    [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL_INIT]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2
 ; CHECK-NEXT:    [[RDX_NEXT_EPIL:%.*]] = add nuw nsw i32 [[RDX_UNR]], [[TMP4]]
 ; CHECK-NEXT:    br label %[[EXIT]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
 entry:
@@ -76,7 +75,7 @@ define i32 @test_add_reduction_constant_op(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; CHECK:       [[ENTRY_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
@@ -88,17 +87,16 @@ define i32 @test_add_reduction_constant_op(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
-; CHECK:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; CHECK-NEXT:    [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
-; CHECK-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       [[EXIT_UNR_LCSSA]]:
-; CHECK-NEXT:    [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RES_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RDX_UNR1:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; CHECK:       [[LOOP_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR1]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; CHECK-NEXT:    br label %[[LOOP_EPIL:.*]]
 ; CHECK:       [[LOOP_EPIL]]:
 ; CHECK-NEXT:    [[RDX_NEXT_EPIL:%.*]] = add nuw nsw i32 [[RDX_UNR]], 1
@@ -130,7 +128,7 @@ define i32 @test_add_reduction_8x_unroll(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 7
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; CHECK:       [[ENTRY_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
@@ -172,23 +170,22 @@ define i32 @test_add_reduction_8x_unroll(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
 ; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
-; CHECK-NEXT:    [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_7]], %[[LOOP]] ]
-; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_7]], %[[LOOP]] ]
-; CHECK-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[EXIT_UNR_LCSSA]]:
-; CHECK-NEXT:    [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RES_PH:%.*]] = phi i32 [ [[RDX_NEXT_7]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RDX_UNR:%.*]] = phi i32 [ [[RDX_NEXT_7]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
 ; CHECK:       [[LOOP_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[RDX_EPIL_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; CHECK-NEXT:    br label %[[LOOP_EPIL:.*]]
 ; CHECK:       [[LOOP_EPIL]]:
-; CHECK-NEXT:    [[IV_EPIL:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
-; CHECK-NEXT:    [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
+; CHECK-NEXT:    [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
+; CHECK-NEXT:    [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
 ; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
 ; CHECK-NEXT:    [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
index a3cfeac..5f4bbf1 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
@@ -11,31 +11,30 @@ define i32 @unroll(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N)
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[N]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]]
 ; CHECK:       for.body.lr.ph.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup.loopexit.unr-lcssa.loopexit:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]
 ; CHECK:       for.cond.cleanup.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[ADD_LCSSA_PH:%.*]] = phi i32 [ poison, [[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] ]
-; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[C_010_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD_3]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_EPIL_PREHEADER:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_EPIL_PREHEADER]]
 ; CHECK:       for.body.epil.preheader:
+; CHECK-NEXT:    [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]] ]
+; CHECK-NEXT:    [[C_010_EPIL_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD_3:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
 ; CHECK:       for.body.epil:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV_UNR]]
+; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV_EPIL_INIT]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV_UNR]]
+; CHECK-NEXT:    [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV_EPIL_INIT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2_EPIL]], align 4
 ; CHECK-NEXT:    [[MUL_EPIL:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]]
-; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add nsw i32 [[MUL_EPIL]], [[C_010_UNR]]
+; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add nsw i32 [[MUL_EPIL]], [[C_010_EPIL_INIT]]
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 1
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]], label [[FOR_BODY_EPIL_1:%.*]]
 ; CHECK:       for.body.epil.1:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL_INIT]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_EPIL]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_EPIL]]
@@ -45,7 +44,7 @@ define i32 @unroll(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N)
 ; CHECK-NEXT:    [[EPIL_ITER_CMP_1_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 2
 ; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]], label [[FOR_BODY_EPIL_2:%.*]]
 ; CHECK:       for.body.epil.2:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_EPIL_INIT]], 2
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
@@ -57,7 +56,7 @@ define i32 @unroll(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N)
 ; CHECK-NEXT:    [[ADD_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ], [ [[ADD_EPIL_1]], [[FOR_BODY_EPIL_1]] ], [ [[ADD_EPIL_2]], [[FOR_BODY_EPIL_2]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP_LOOPEXIT]]
 ; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD_3]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_LCSSA_PH1]], [[FOR_COND_CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ]
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    [[C_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
@@ -96,7 +95,7 @@ define i32 @unroll(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N)
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
 ; CHECK-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ;
 entry:
   %cmp9 = icmp eq i32 %N, 0
diff --git a/llvm/test/Transforms/LoopUnroll/scev-invalidation-lcssa.ll b/llvm/test/Transforms/LoopUnroll/scev-invalidation-lcssa.ll
index 0a3d201..fd07238 100644
--- a/llvm/test/Transforms/LoopUnroll/scev-invalidation-lcssa.ll
+++ b/llvm/test/Transforms/LoopUnroll/scev-invalidation-lcssa.ll
@@ -30,7 +30,7 @@ define i32 @f(i1 %cond1) #0 !prof !0 {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[LD_LCSSA]], 1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[LD_LCSSA]], 7
-; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT2_UNR_LCSSA:%.*]], label [[ENTRY2_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP2_EPIL_PREHEADER:%.*]], label [[ENTRY2_NEW:%.*]]
 ; CHECK:       entry2.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[LOOP2:%.*]]
@@ -40,18 +40,18 @@ define i32 @f(i1 %cond1) #0 !prof !0 {
 ; CHECK-NEXT:    [[INC_7]] = add i64 [[PHI]], 8
 ; CHECK-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
 ; CHECK-NEXT:    [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[EXIT2_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP2]]
-; CHECK:       exit2.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[PHI_UNR_PH:%.*]] = phi i64 [ [[INC_7]], [[LOOP2]] ]
-; CHECK-NEXT:    br label [[EXIT2_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_7]], label [[EXIT2_UNR_LCSSA:%.*]], label [[LOOP2]]
 ; CHECK:       exit2.unr-lcssa:
-; CHECK-NEXT:    [[PHI_UNR:%.*]] = phi i64 [ 0, [[ENTRY2]] ], [ [[PHI_UNR_PH]], [[EXIT2_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PHI_UNR:%.*]] = phi i64 [ [[INC_7]], [[LOOP2]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP2_EPIL_PREHEADER:%.*]], label [[EXIT2:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP2_EPIL_PREHEADER]], label [[EXIT2:%.*]]
 ; CHECK:       loop2.epil.preheader:
+; CHECK-NEXT:    [[PHI_EPIL_INIT:%.*]] = phi i64 [ 0, [[ENTRY2]] ], [ [[PHI_UNR]], [[EXIT2_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; CHECK-NEXT:    br label [[LOOP2_EPIL:%.*]]
 ; CHECK:       loop2.epil:
-; CHECK-NEXT:    [[PHI_EPIL:%.*]] = phi i64 [ [[PHI_UNR]], [[LOOP2_EPIL_PREHEADER]] ], [ [[INC_EPIL:%.*]], [[LOOP2_EPIL]] ]
+; CHECK-NEXT:    [[PHI_EPIL:%.*]] = phi i64 [ [[PHI_EPIL_INIT]], [[LOOP2_EPIL_PREHEADER]] ], [ [[INC_EPIL:%.*]], [[LOOP2_EPIL]] ]
 ; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ 0, [[LOOP2_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[LOOP2_EPIL]] ]
 ; CHECK-NEXT:    [[INC_EPIL]] = add i64 [[PHI_EPIL]], 1
 ; CHECK-NEXT:    [[COND2_EPIL:%.*]] = icmp eq i64 [[LD_LCSSA]], [[PHI_EPIL]]
diff --git a/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll b/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll
index 1481286..f839c88 100644
--- a/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll
+++ b/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll
@@ -17,7 +17,7 @@ define i32 @foo(i32 %N) {
 ; EPILOG-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], 1
 ; EPILOG-NEXT:    [[XTRAITER:%.*]] = and i32 [[TMP0]], 1
 ; EPILOG-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[N]], 1
-; EPILOG-NEXT:    br i1 [[TMP1]], label [[WHILE_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; EPILOG-NEXT:    br i1 [[TMP1]], label [[WHILE_BODY_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; EPILOG:       entry.new:
 ; EPILOG-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]]
 ; EPILOG-NEXT:    br label [[WHILE_BODY:%.*]]
@@ -28,22 +28,21 @@ define i32 @foo(i32 %N) {
 ; EPILOG-NEXT:    [[INC_1]] = add i32 [[I]], 2
 ; EPILOG-NEXT:    [[NITER_NEXT_1]] = add i32 [[NITER]], 2
 ; EPILOG-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; EPILOG-NEXT:    br i1 [[NITER_NCMP_1]], label [[WHILE_END_UNR_LCSSA_LOOPEXIT:%.*]], label [[WHILE_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; EPILOG:       while.end.unr-lcssa.loopexit:
+; EPILOG-NEXT:    br i1 [[NITER_NCMP_1]], label [[WHILE_END_UNR_LCSSA:%.*]], label [[WHILE_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; EPILOG:       while.end.unr-lcssa:
 ; EPILOG-NEXT:    [[I_LCSSA_PH_PH:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
 ; EPILOG-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[INC_1]], [[WHILE_BODY]] ]
-; EPILOG-NEXT:    br label [[WHILE_END_UNR_LCSSA]]
-; EPILOG:       while.end.unr-lcssa:
-; EPILOG-NEXT:    [[I_LCSSA_PH:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[I_LCSSA_PH_PH]], [[WHILE_END_UNR_LCSSA_LOOPEXIT]] ]
-; EPILOG-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[I_UNR_PH]], [[WHILE_END_UNR_LCSSA_LOOPEXIT]] ]
 ; EPILOG-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; EPILOG-NEXT:    br i1 [[LCMP_MOD]], label [[WHILE_BODY_EPIL_PREHEADER:%.*]], label [[WHILE_END:%.*]]
+; EPILOG-NEXT:    br i1 [[LCMP_MOD]], label [[WHILE_BODY_EPIL_PREHEADER]], label [[WHILE_END:%.*]]
 ; EPILOG:       while.body.epil.preheader:
+; EPILOG-NEXT:    [[I_EPIL_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_UNR_PH]], [[WHILE_END_UNR_LCSSA]] ]
+; EPILOG-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; EPILOG-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
 ; EPILOG-NEXT:    br label [[WHILE_BODY_EPIL:%.*]]
 ; EPILOG:       while.body.epil:
 ; EPILOG-NEXT:    br label [[WHILE_END]]
 ; EPILOG:       while.end:
-; EPILOG-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I_LCSSA_PH]], [[WHILE_END_UNR_LCSSA]] ], [ [[I_UNR]], [[WHILE_BODY_EPIL]] ]
+; EPILOG-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I_LCSSA_PH_PH]], [[WHILE_END_UNR_LCSSA]] ], [ [[I_EPIL_INIT]], [[WHILE_BODY_EPIL]] ]
 ; EPILOG-NEXT:    ret i32 [[I_LCSSA]]
 ;
 ; PROLOG-LABEL: @foo(
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
index 20a247f..611ee5f 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
@@ -8,7 +8,7 @@
 ; CHECK:   %mul.1 = mul
 ; CHECK:   %mul.2 = mul
 ; CHECK:   %mul.3 = mul
-; CHECK:   br i1 %niter.ncmp.7, label %loop.end.unr-lcssa.loopexit, label %loop, !prof [[PROF0:![0-9]+]]
+; CHECK:   br i1 %niter.ncmp.7, label %loop.end.unr-lcssa, label %loop, !prof [[PROF0:![0-9]+]]
 ; CHECK: loop.epil:
 ; CHECK:   br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof [[PROF1:![0-9]+]], !llvm.loop {{![0-9]+}}
 define i32 @bar_prof(ptr noalias nocapture readonly %src, i64 %c) !prof !1 {
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-loads-cse.ll b/llvm/test/Transforms/LoopUnroll/unroll-loads-cse.ll
index d410525..f85aac7 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-loads-cse.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-loads-cse.ll
@@ -12,7 +12,7 @@ define void @cse_matching_load_from_previous_unrolled_iteration(ptr %src, ptr no
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -35,15 +35,15 @@ define void @cse_matching_load_from_previous_unrolled_iteration(ptr %src, ptr no
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       exit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
-; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       exit.unr-lcssa:
-; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER]], label [[EXIT:%.*]]
 ; CHECK:       loop.epil.preheader:
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR1]], [[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
 ; CHECK:       loop.epil:
 ; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
@@ -88,7 +88,7 @@ define void @cse_different_load_types(ptr %src, ptr noalias %dst, i64 %N) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -115,15 +115,15 @@ define void @cse_different_load_types(ptr %src, ptr noalias %dst, i64 %N) {
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK:       exit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
-; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit.unr-lcssa:
-; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER]], label [[EXIT:%.*]]
 ; CHECK:       loop.epil.preheader:
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR1]], [[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
 ; CHECK:       loop.epil:
 ; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
@@ -170,7 +170,7 @@ define void @cse_volatile_loads(ptr %src, ptr noalias %dst, i64 %N) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -195,15 +195,15 @@ define void @cse_volatile_loads(ptr %src, ptr noalias %dst, i64 %N) {
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       exit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
-; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       exit.unr-lcssa:
-; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER]], label [[EXIT:%.*]]
 ; CHECK:       loop.epil.preheader:
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR1]], [[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
 ; CHECK:       loop.epil:
 ; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
@@ -248,7 +248,7 @@ define void @cse_atomic_loads(ptr %src, ptr noalias %dst, i64 %N) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -273,15 +273,15 @@ define void @cse_atomic_loads(ptr %src, ptr noalias %dst, i64 %N) {
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
-; CHECK:       exit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
-; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       exit.unr-lcssa:
-; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER]], label [[EXIT:%.*]]
 ; CHECK:       loop.epil.preheader:
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR1]], [[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
 ; CHECK:       loop.epil:
 ; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
@@ -326,7 +326,7 @@ define void @cse_load_may_be_clobbered(ptr %src, ptr %dst, i64 %N) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[ENTRY_NEW:%.*]]
 ; CHECK:       entry.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -351,15 +351,15 @@ define void @cse_load_may_be_clobbered(ptr %src, ptr %dst, i64 %N) {
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
-; CHECK:       exit.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
-; CHECK-NEXT:    br label [[EXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_UNR_LCSSA:%.*]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       exit.unr-lcssa:
-; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[IV_UNR1:%.*]] = phi i64 [ [[IV_NEXT_1]], [[LOOP]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[LOOP_EPIL_PREHEADER]], label [[EXIT:%.*]]
 ; CHECK:       loop.epil.preheader:
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_UNR1]], [[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label [[LOOP_EPIL:%.*]]
 ; CHECK:       loop.epil:
 ; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr i64, ptr [[SRC_12]], i64 [[IV_UNR]]
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll b/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll
index f1a5adf..3510650 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/dependencies_visit_order.ll
@@ -6,7 +6,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 define void @test1() {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    br i1 false, label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA:%.*]], label [[BB_NEW:%.*]]
+; CHECK-NEXT:    br i1 false, label [[BB5_PREHEADER_EPIL_PREHEADER:%.*]], label [[BB_NEW:%.*]]
 ; CHECK:       bb.new:
 ; CHECK-NEXT:    br label [[BB5_PREHEADER:%.*]]
 ; CHECK:       bb5.preheader:
@@ -30,17 +30,16 @@ define void @test1() {
 ; CHECK-NEXT:    br i1 true, label [[BB38]], label [[BB10_PREHEADER]]
 ; CHECK:       bb38:
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i16 [[NITER_NEXT_3]], -28
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA_LOOPEXIT:%.*]], label [[BB5_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       bb1.bb43_crit_edge.unr-lcssa.loopexit:
-; CHECK-NEXT:    [[I10_UNR_PH:%.*]] = phi i16 [ [[I42_3]], [[BB38]] ]
-; CHECK-NEXT:    br label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label [[BB1_BB43_CRIT_EDGE_UNR_LCSSA:%.*]], label [[BB5_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       bb1.bb43_crit_edge.unr-lcssa:
-; CHECK-NEXT:    [[I10_UNR:%.*]] = phi i16 [ 0, [[BB:%.*]] ], [ [[I10_UNR_PH]], [[BB1_BB43_CRIT_EDGE_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    br i1 true, label [[BB5_PREHEADER_EPIL_PREHEADER:%.*]], label [[BB1_BB43_CRIT_EDGE:%.*]]
+; CHECK-NEXT:    [[I10_UNR:%.*]] = phi i16 [ [[I42_3]], [[BB38]] ]
+; CHECK-NEXT:    br i1 true, label [[BB5_PREHEADER_EPIL_PREHEADER]], label [[BB1_BB43_CRIT_EDGE:%.*]]
 ; CHECK:       bb5.preheader.epil.preheader:
+; CHECK-NEXT:    [[I10_EPIL_INIT:%.*]] = phi i16 [ 0, [[BB:%.*]] ], [ [[I10_UNR]], [[BB1_BB43_CRIT_EDGE_UNR_LCSSA]] ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
 ; CHECK-NEXT:    br label [[BB5_PREHEADER_EPIL:%.*]]
 ; CHECK:       bb5.preheader.epil:
-; CHECK-NEXT:    [[I10_EPIL:%.*]] = phi i16 [ [[I10_UNR]], [[BB5_PREHEADER_EPIL_PREHEADER]] ], [ [[I42_EPIL:%.*]], [[BB38_EPIL:%.*]] ]
+; CHECK-NEXT:    [[I10_EPIL:%.*]] = phi i16 [ [[I10_EPIL_INIT]], [[BB5_PREHEADER_EPIL_PREHEADER]] ], [ [[I42_EPIL:%.*]], [[BB38_EPIL:%.*]] ]
 ; CHECK-NEXT:    [[EPIL_ITER:%.*]] = phi i16 [ 0, [[BB5_PREHEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[BB38_EPIL]] ]
 ; CHECK-NEXT:    br label [[BB10_PREHEADER_EPIL:%.*]]
 ; CHECK:       bb10.preheader.epil:
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/followup.ll b/llvm/test/Transforms/LoopUnrollAndJam/followup.ll
index 5186f77..c8be48bf 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/followup.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/followup.ll
@@ -52,7 +52,7 @@ for.end:
 
 
 ; CHECK: br i1 %exitcond.3, label %for.latch, label %for.inner, !llvm.loop ![[LOOP_INNER:[0-9]+]]
-; CHECK: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit, label %for.outer, !llvm.loop ![[LOOP_OUTER:[0-9]+]]
+; CHECK: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa, label %for.outer, !llvm.loop ![[LOOP_OUTER:[0-9]+]]
 ; CHECK: br i1 %exitcond.epil, label %for.latch.epil, label %for.inner.epil, !llvm.loop ![[LOOP_REMAINDER_INNER:[0-9]+]]
 ; CHECK: br i1 %exitcond.epil.1, label %for.latch.epil.1, label %for.inner.epil.1, !llvm.loop ![[LOOP_REMAINDER_INNER]]
 ; CHECK: br i1 %exitcond.epil.2, label %for.latch.epil.2, label %for.inner.epil.2, !llvm.loop ![[LOOP_REMAINDER_INNER]]
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
index 6f48c41..9ee51cf 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
@@ -17,7 +17,7 @@ define void @test1(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
 ; CHECK:       [[FOR_OUTER_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
 ; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
@@ -71,15 +71,15 @@ define void @test1(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_2]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[I_UNR1:%.*]] = phi i32 [ [[ADD8_3]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END_LOOPEXIT:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR1]], %[[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
@@ -193,7 +193,7 @@ define void @test2(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
 ; CHECK:       [[FOR_OUTER_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
 ; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
@@ -251,15 +251,15 @@ define void @test2(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP6:![0-9]+]]
-; CHECK:       [[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    br label %[[FOR_END10_LOOPEXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[FOR_END10_LOOPEXIT_UNR_LCSSA]]:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END10_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[I_UNR1:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END10_LOOPEXIT:.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END10_LOOPEXIT:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR1]], %[[FOR_END10_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
@@ -615,7 +615,7 @@ define i32 @test6() #0 {
 ; CHECK-LABEL: define i32 @test6() {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[F_PROMOTED10:%.*]] = load i32, ptr @f, align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    br i1 false, label %[[FOR_END_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK-NEXT:    br i1 false, label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; CHECK:       [[ENTRY_NEW]]:
 ; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
 ; CHECK:       [[FOR_OUTER]]:
@@ -636,18 +636,15 @@ define i32 @test6() #0 {
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp ne i32 [[INC_3]], 7
 ; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_INNER]], label %[[FOR_LATCH]]
 ; CHECK:       [[FOR_LATCH]]:
-; CHECK-NEXT:    br i1 false, label %[[FOR_OUTER]], label %[[FOR_END_UNR_LCSSA_LOOPEXIT:.*]], !llvm.loop [[LOOP7:![0-9]+]]
-; CHECK:       [[FOR_END_UNR_LCSSA_LOOPEXIT]]:
+; CHECK-NEXT:    br i1 false, label %[[FOR_OUTER]], label %[[FOR_END_UNR_LCSSA:.*]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[FOR_END_UNR_LCSSA]]:
 ; CHECK-NEXT:    [[DOTLCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[INC_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 7, %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[P0_UNR_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ]
-; CHECK-NEXT:    br label %[[FOR_END_UNR_LCSSA]]
-; CHECK:       [[FOR_END_UNR_LCSSA]]:
-; CHECK-NEXT:    [[DOTLCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[DOTLCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[INC_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[INC_LCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[P0_UNR:%.*]] = phi i32 [ [[F_PROMOTED10]], %[[ENTRY]] ], [ [[P0_UNR_PH]], %[[FOR_END_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    br i1 true, label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END:.*]]
+; CHECK-NEXT:    br i1 true, label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[P0_UNR:%.*]] = phi i32 [ [[F_PROMOTED10]], %[[ENTRY]] ], [ [[P0_UNR_PH]], %[[FOR_END_UNR_LCSSA]] ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
 ; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
@@ -661,8 +658,8 @@ define i32 @test6() #0 {
 ; CHECK-NEXT:    [[DOTLCSSA_EPIL:%.*]] = phi i32 [ [[P1_EPIL]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    br label %[[FOR_END]]
 ; CHECK:       [[FOR_END]]:
-; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], %[[FOR_LATCH_EPIL]] ]
-; CHECK-NEXT:    [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH]], %[[FOR_END_UNR_LCSSA]] ], [ 7, %[[FOR_LATCH_EPIL]] ]
+; CHECK-NEXT:    [[DOTLCSSA_LCSSA:%.*]] = phi i32 [ [[DOTLCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA]] ], [ [[DOTLCSSA_EPIL]], %[[FOR_LATCH_EPIL]] ]
+; CHECK-NEXT:    [[INC_LCSSA_LCSSA:%.*]] = phi i32 [ [[INC_LCSSA_LCSSA_PH_PH]], %[[FOR_END_UNR_LCSSA]] ], [ 7, %[[FOR_LATCH_EPIL]] ]
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -708,7 +705,7 @@ define void @test7(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_PREHEADER_NEW:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_PREHEADER_NEW:.*]]
 ; CHECK:       [[FOR_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
 ; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
@@ -747,7 +744,7 @@ define void @test7(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[FOR_INNER]]:
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9]], %[[FOR_INNER]] ]
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10:%.*]], %[[FOR_INNER]] ]
@@ -775,14 +772,14 @@ define void @test7(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[ADD10_3]] = add nuw i32 [[J_3]], 1
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[ADD10_3]], [[E]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
-; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]]
 ; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[I_UNR1:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END_LOOPEXIT:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_PREHEADER]] ], [ [[I_UNR1]], %[[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
@@ -907,7 +904,7 @@ define void @test8(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[X_038:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_CLEANUP:.*]] ], [ 0, %[[FOR_PREHEADER]] ]
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_CLEANUP_UNR_LCSSA:.*]], label %[[FOR_OUTEST_NEW:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_OUTEST_NEW:.*]]
 ; CHECK:       [[FOR_OUTEST_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
 ; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
@@ -922,10 +919,10 @@ define void @test8(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]]
 ; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 [[I]], 2
-; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_4]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add nuw nsw i32 [[I]], 3
 ; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
 ; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]]
@@ -970,18 +967,18 @@ define void @test8(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_INNER]] ]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_4]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP9:![0-9]+]]
-; CHECK:       [[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT]]:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    br label %[[FOR_CLEANUP_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_CLEANUP_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       [[FOR_CLEANUP_UNR_LCSSA]]:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTEST]] ], [ [[I_UNR_PH]], %[[FOR_CLEANUP_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[I_UNR1:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_CLEANUP]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_CLEANUP]]
 ; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTEST]] ], [ [[I_UNR1]], %[[FOR_CLEANUP_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
@@ -1116,7 +1113,7 @@ define void @test9(i32 %I, i32 %E, ptr nocapture %A, ptr nocapture readonly %B)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[I]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[I]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
 ; CHECK:       [[FOR_OUTER_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]]
 ; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
@@ -1174,15 +1171,15 @@ define void @test9(i32 %I, i32 %E, ptr nocapture %A, ptr nocapture readonly %B)
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_2]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP12:![0-9]+]]
-; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
-; CHECK-NEXT:    [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
+; CHECK-NEXT:    [[I_UNR1:%.*]] = phi i32 [ [[ADD8_3]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END_LOOPEXIT:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR1]], %[[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
 ; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
@@ -1293,11 +1290,11 @@ for.end:
 define signext i16 @test10(i32 %k) #0 {
 ; CHECK-LABEL: define signext i16 @test10(
 ; CHECK-SAME: i32 [[K:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr @c, align 1
 ; CHECK-NEXT:    [[TOBOOL9:%.*]] = icmp eq i8 [[TMP0]], 0
 ; CHECK-NEXT:    [[TOBOOL13:%.*]] = icmp ne i32 [[K]], 0
-; CHECK-NEXT:    br i1 false, label %[[FOR_END26_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK-NEXT:    br i1 false, label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
 ; CHECK:       [[ENTRY_NEW]]:
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
@@ -1325,18 +1322,14 @@ define signext i16 @test10(i32 %k) #0 {
 ; CHECK-NEXT:    br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_1:.*]], label %[[FOR_BODY2_SPLIT2_1:.*]]
 ; CHECK:       [[FOR_INC24]]:
 ; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_3:%.*]] = phi i64 [ [[STOREMERGE_4_3:%.*]], %[[FOR_INC21_3]] ]
-; CHECK-NEXT:    br i1 false, label %[[FOR_BODY]], label %[[FOR_END26_UNR_LCSSA_LOOPEXIT:.*]], !llvm.loop [[LOOP13:![0-9]+]]
-; CHECK:       [[FOR_END26_UNR_LCSSA_LOOPEXIT]]:
+; CHECK-NEXT:    br i1 false, label %[[FOR_BODY]], label %[[FOR_END26_UNR_LCSSA:.*]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       [[FOR_END26_UNR_LCSSA]]:
 ; CHECK-NEXT:    [[DEC_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ 0, %[[FOR_INC24]] ]
 ; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA_PH_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], %[[FOR_INC24]] ]
 ; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA_PH_PH:%.*]] = phi i32 [ 0, %[[FOR_INC24]] ]
-; CHECK-NEXT:    br label %[[FOR_END26_UNR_LCSSA]]
-; CHECK:       [[FOR_END26_UNR_LCSSA]]:
-; CHECK-NEXT:    [[DEC_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, %[[ENTRY]] ], [ [[DEC_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA_PH:%.*]] = phi i64 [ poison, %[[ENTRY]] ], [ [[STOREMERGE_4_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[STOREMERGE_5_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT:    br i1 true, label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[FOR_END26:.*]]
+; CHECK-NEXT:    br i1 true, label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END26:.*]]
 ; CHECK:       [[FOR_BODY_EPIL_PREHEADER]]:
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
 ; CHECK-NEXT:    br label %[[FOR_BODY_EPIL:.*]]
 ; CHECK:       [[FOR_BODY_EPIL]]:
 ; CHECK-NEXT:    br label %[[FOR_BODY2_EPIL:.*]]
@@ -1360,9 +1353,9 @@ define signext i16 @test10(i32 %k) #0 {
 ; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_EPIL:%.*]] = phi i64 [ [[STOREMERGE_4_EPIL]], %[[FOR_INC21_EPIL]] ]
 ; CHECK-NEXT:    br label %[[FOR_END26]]
 ; CHECK:       [[FOR_END26]]:
-; CHECK-NEXT:    [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
-; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], %[[FOR_INC24_EPIL]] ]
-; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
+; CHECK-NEXT:    [[DEC_LCSSA_LCSSA:%.*]] = phi i64 [ [[DEC_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
+; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ [[STOREMERGE_4_LCSSA_EPIL]], %[[FOR_INC24_EPIL]] ]
+; CHECK-NEXT:    [[STOREMERGE_5_LCSSA_LCSSA:%.*]] = phi i32 [ [[STOREMERGE_5_LCSSA_LCSSA_PH_PH]], %[[FOR_END26_UNR_LCSSA]] ], [ 0, %[[FOR_INC24_EPIL]] ]
 ; CHECK-NEXT:    store i64 [[DEC_LCSSA_LCSSA]], ptr @g, align 8
 ; CHECK-NEXT:    ret i16 0
 ; CHECK:       [[FOR_BODY2_SPLIT2_1]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll
index 649e34e..7548bf6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll
@@ -45,9 +45,6 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly
 ; CHECK-UF4-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 8)
 ; CHECK-UF4-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4)
 ; CHECK-UF4-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0)
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 4, i64 [[N]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY2:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 8, i64 [[N]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 12, i64 [[N]])
 ; CHECK-UF4-NEXT:    br label [[VECTOR_BODY1:%.*]]
 ; CHECK-UF4:       vector.body:
 ; CHECK-UF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY1]] ]
@@ -67,17 +64,11 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly
 ; CHECK-UF4-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK5]])
 ; CHECK-UF4-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP19]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK6]])
 ; CHECK-UF4-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
-; CHECK-UF4-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 4
-; CHECK-UF4-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 8
-; CHECK-UF4-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 12
 ; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[INDEX]], i64 [[TMP6]])
 ; CHECK-UF4-NEXT:    [[TMP12]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 12)
 ; CHECK-UF4-NEXT:    [[TMP11]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 8)
 ; CHECK-UF4-NEXT:    [[TMP10]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4)
 ; CHECK-UF4-NEXT:    [[TMP9]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0)
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT7:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP13]], i64 [[TMP6]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT8:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP14]], i64 [[TMP6]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT9:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP15]], i64 [[TMP6]])
 ; CHECK-UF4-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0
 ; CHECK-UF4-NEXT:    [[TMP20:%.*]] = xor i1 [[TMP21]], true
 ; CHECK-UF4-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll
new file mode 100644
index 0000000..22696d0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -p loop-vectorize -prefer-inloop-reductions -mcpu=apple-m1 -force-vector-interleave=1 -S %s | FileCheck %s
+
+target triple = "arm64-apple-macosx"
+
+define i32 @mul_used_outside_vpexpression(ptr %src.0, ptr %src.1) {
+; CHECK-LABEL: define i32 @mul_used_outside_vpexpression(
+; CHECK-SAME: ptr [[SRC_0:%.*]], ptr [[SRC_1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ITER_CHECK:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT:    br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[SRC_1]], i64 1
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP1]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <16 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP4]])
+; CHECK-NEXT:    [[TMP6]] = add i32 [[VEC_PHI]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP4]])
+; CHECK-NEXT:    [[TMP8]] = or i32 [[VEC_PHI1]], [[TMP7]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 false, label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK:       [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 96
+; CHECK-NEXT:    br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
+; CHECK:       [[VEC_EPILOG_PH]]:
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX2:%.*]] = phi i32 [ [[TMP8]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 100
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[SRC_1]], i64 1
+; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI5:%.*]] = phi i32 [ [[BC_MERGE_RDX2]], %[[VEC_EPILOG_PH]] ], [ [[TMP19:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 [[INDEX3]]
+; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[NEXT_GEP6]], align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 1
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = zext <4 x i8> [[WIDE_LOAD7]] to <4 x i32>
+; CHECK-NEXT:    [[TMP14:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT9]] to <4 x i32>
+; CHECK-NEXT:    [[TMP15:%.*]] = mul <4 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]])
+; CHECK-NEXT:    [[TMP17]] = add i32 [[VEC_PHI4]], [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP15]])
+; CHECK-NEXT:    [[TMP19]] = or i32 [[VEC_PHI5]], [[TMP18]]
+; CHECK-NEXT:    [[INDEX_NEXT10]] = add nuw i64 [[INDEX3]], 4
+; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT10]], 100
+; CHECK-NEXT:    br i1 [[TMP20]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br i1 false, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL11:%.*]] = phi ptr [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[SRC_0]], %[[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX12:%.*]] = phi i32 [ [[TMP17]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP6]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[GEP_0:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RED_0:%.*]] = phi i32 [ [[BC_MERGE_RDX12]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_0_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RED_1:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_1_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_0]] = getelementptr i8, ptr [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[L_0:%.*]] = load i8, ptr [[PTR_IV]], align 1
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr i8, ptr [[SRC_1]], i64 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i8, ptr [[GEP_1]], align 1
+; CHECK-NEXT:    [[L_0_EXT:%.*]] = zext i8 [[L_0]] to i32
+; CHECK-NEXT:    [[L_1_EXT:%.*]] = zext i8 [[L_1]] to i32
+; CHECK-NEXT:    [[MUL_EXT_LL:%.*]] = mul i32 [[L_0_EXT]], [[L_1_EXT]]
+; CHECK-NEXT:    [[RED_1_NEXT]] = or i32 [[MUL_EXT_LL]], [[RED_1]]
+; CHECK-NEXT:    [[RED_0_NEXT]] = add i32 [[MUL_EXT_LL]], [[RED_0]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV]], 101
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RED_1_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_1_NEXT]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[RED_0_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_0_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ [[TMP17]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[RED_1_NEXT_LCSSA]], [[RED_0_NEXT_LCSSA]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %ptr.iv = phi ptr [ %src.0, %entry ], [ %gep.0, %loop ]
+  %red.0 = phi i32 [ 0, %entry ], [ %red.0.next, %loop ]
+  %red.1 = phi i32 [ 0, %entry ], [ %red.1.next, %loop ]
+  %gep.0 = getelementptr i8, ptr %ptr.iv, i64 1
+  %l.0 = load i8, ptr %ptr.iv, align 1
+  %gep.1 = getelementptr i8, ptr %src.1, i64 1
+  %l.1 = load i8, ptr %gep.1, align 1
+  %l.0.ext = zext i8 %l.0 to i32
+  %l.1.ext = zext i8 %l.1 to i32
+  %mul.ext.ll = mul i32 %l.0.ext, %l.1.ext
+  %red.1.next = or i32 %mul.ext.ll, %red.1
+  %red.0.next = add i32 %mul.ext.ll, %red.0
+  %iv.next = add i32 %iv, 1
+  %ec = icmp eq i32 %iv, 101
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = add i32 %red.1.next, %red.0.next
+  ret i32 %res
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll
index 5ee4e9e..75acbea9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll
@@ -46,23 +46,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
 ; CHECK-UF4-NEXT:    [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]]
 ; CHECK-UF4-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP3]]
 ; CHECK-UF4-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
-; CHECK-UF4-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4
-; CHECK-UF4-NEXT:    [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]]
-; CHECK-UF4-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 5
-; CHECK-UF4-NEXT:    [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]]
-; CHECK-UF4-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48
-; CHECK-UF4-NEXT:    [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]]
 ; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 0, i64 [[N]])
 ; CHECK-UF4-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 48)
 ; CHECK-UF4-NEXT:    [[TMP18:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 32)
 ; CHECK-UF4-NEXT:    [[TMP17:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 16)
 ; CHECK-UF4-NEXT:    [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0)
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]])
 ; CHECK-UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-UF4:       vector.body:
 ; CHECK-UF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -103,23 +91,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
 ; CHECK-UF4-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP27]], ptr [[TMP42]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK7]])
 ; CHECK-UF4-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP28]], ptr [[TMP45]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK8]])
 ; CHECK-UF4-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]]
-; CHECK-UF4-NEXT:    [[TMP46:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 4
-; CHECK-UF4-NEXT:    [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]]
-; CHECK-UF4-NEXT:    [[TMP49:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP50:%.*]] = shl nuw i64 [[TMP49]], 5
-; CHECK-UF4-NEXT:    [[TMP51:%.*]] = add i64 [[INDEX]], [[TMP50]]
-; CHECK-UF4-NEXT:    [[TMP52:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP53:%.*]] = mul nuw i64 [[TMP52]], 48
-; CHECK-UF4-NEXT:    [[TMP54:%.*]] = add i64 [[INDEX]], [[TMP53]]
 ; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 [[INDEX]], i64 [[TMP9]])
 ; CHECK-UF4-NEXT:    [[TMP58]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 48)
 ; CHECK-UF4-NEXT:    [[TMP57]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 32)
 ; CHECK-UF4-NEXT:    [[TMP56]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 16)
 ; CHECK-UF4-NEXT:    [[TMP55]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0)
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP48]], i64 [[TMP9]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP51]], i64 [[TMP9]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP54]], i64 [[TMP9]])
 ; CHECK-UF4-NEXT:    [[TMP59:%.*]] = extractelement <vscale x 16 x i1> [[TMP55]], i32 0
 ; CHECK-UF4-NEXT:    [[TMP60:%.*]] = xor i1 [[TMP59]], true
 ; CHECK-UF4-NEXT:    br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -191,23 +167,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
 ; CHECK-UF4-NEXT:    [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]]
 ; CHECK-UF4-NEXT:    [[TMP56:%.*]] = icmp ugt i64 [[N]], [[TMP26]]
 ; CHECK-UF4-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = select i1 [[TMP56]], i64 [[TMP31]], i64 0
-; CHECK-UF4-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
-; CHECK-UF4-NEXT:    [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]]
-; CHECK-UF4-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
-; CHECK-UF4-NEXT:    [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]]
-; CHECK-UF4-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 6
-; CHECK-UF4-NEXT:    [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]]
 ; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]])
 ; CHECK-UF4-NEXT:    [[TMP14:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 6)
 ; CHECK-UF4-NEXT:    [[TMP13:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4)
 ; CHECK-UF4-NEXT:    [[TMP12:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 2)
 ; CHECK-UF4-NEXT:    [[TMP11:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0)
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]])
 ; CHECK-UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-UF4:       vector.body:
 ; CHECK-UF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -248,23 +212,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
 ; CHECK-UF4-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP18]], ptr [[TMP37]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK7]])
 ; CHECK-UF4-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP19]], ptr [[TMP40]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK8]])
 ; CHECK-UF4-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]]
-; CHECK-UF4-NEXT:    [[TMP41:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 1
-; CHECK-UF4-NEXT:    [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]]
-; CHECK-UF4-NEXT:    [[TMP44:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP45:%.*]] = shl nuw i64 [[TMP44]], 2
-; CHECK-UF4-NEXT:    [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]]
-; CHECK-UF4-NEXT:    [[TMP47:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-UF4-NEXT:    [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 6
-; CHECK-UF4-NEXT:    [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP48]]
 ; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[WIDE_TRIP_COUNT]])
 ; CHECK-UF4-NEXT:    [[TMP53]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 6)
 ; CHECK-UF4-NEXT:    [[TMP52]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4)
 ; CHECK-UF4-NEXT:    [[TMP51]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 2)
 ; CHECK-UF4-NEXT:    [[TMP50]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0)
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP43]], i64 [[WIDE_TRIP_COUNT]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP46]], i64 [[WIDE_TRIP_COUNT]])
-; CHECK-UF4-NEXT:    [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP49]], i64 [[WIDE_TRIP_COUNT]])
 ; CHECK-UF4-NEXT:    [[TMP54:%.*]] = extractelement <vscale x 2 x i1> [[TMP50]], i32 0
 ; CHECK-UF4-NEXT:    [[TMP55:%.*]] = xor i1 [[TMP54]], true
 ; CHECK-UF4-NEXT:    br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll
new file mode 100644
index 0000000..fd83a01
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/replicating-load-store-costs.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+target triple = "armv7-unknown-linux-gnueabihf"
+
+define void @replicating_load_used_by_other_load(i32 %arg, ptr %a, i32 %b) {
+; CHECK-LABEL: define void @replicating_load_used_by_other_load(
+; CHECK-SAME: i32 [[ARG:%.*]], ptr [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[ARG]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[IV]], 1
+; CHECK-NEXT:    [[AND_1:%.*]] = and i32 [[IV]], 1
+; CHECK-NEXT:    [[SHL_1:%.*]] = shl i32 [[IV]], 2
+; CHECK-NEXT:    [[SHL_2:%.*]] = shl i32 [[IV]], 1
+; CHECK-NEXT:    [[AND_2:%.*]] = and i32 [[SHL_2]], 2
+; CHECK-NEXT:    [[OR_1:%.*]] = or i32 [[AND_2]], [[AND_1]]
+; CHECK-NEXT:    [[OR_2:%.*]] = or i32 [[OR_1]], [[SHL_1]]
+; CHECK-NEXT:    [[XOR_1:%.*]] = xor i32 [[B]], [[OR_2]]
+; CHECK-NEXT:    [[XOR_2:%.*]] = xor i32 [[XOR_1]], [[ARG]]
+; CHECK-NEXT:    [[SHR_2:%.*]] = lshr i32 [[SHL_1]], 1
+; CHECK-NEXT:    [[XOR_3:%.*]] = xor i32 [[SHR]], [[ARG]]
+; CHECK-NEXT:    [[AND_3:%.*]] = and i32 [[XOR_3]], 1
+; CHECK-NEXT:    [[AND_4:%.*]] = and i32 [[IV]], 2147483646
+; CHECK-NEXT:    [[OR_3:%.*]] = or i32 [[AND_3]], [[AND_4]]
+; CHECK-NEXT:    [[AND_5:%.*]] = and i32 [[IV]], 254
+; CHECK-NEXT:    [[SHL_3:%.*]] = shl i32 [[OR_3]], 1
+; CHECK-NEXT:    [[XOR_4:%.*]] = xor i32 [[SHL_3]], 2
+; CHECK-NEXT:    [[OR_4:%.*]] = or i32 [[AND_5]], [[XOR_4]]
+; CHECK-NEXT:    [[XOR_5:%.*]] = xor i32 [[SHR_2]], [[OR_4]]
+; CHECK-NEXT:    [[XOR_6:%.*]] = xor i32 [[XOR_5]], [[XOR_2]]
+; CHECK-NEXT:    [[AND_6:%.*]] = and i32 [[XOR_6]], 255
+; CHECK-NEXT:    [[XOR_7:%.*]] = xor i32 [[AND_6]], 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[XOR_7]]
+; CHECK-NEXT:    [[LD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[LD]] to i32
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr i32, ptr null, i32 [[ZEXT]]
+; CHECK-NEXT:    store i32 0, ptr [[GEP_2]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ %iv.next, %loop ], [ %arg, %entry ]
+  %shr = lshr i32 %iv, 1
+  %and.1 = and i32 %iv, 1
+  %shl.1 = shl i32 %iv, 2
+  %shl.2 = shl i32 %iv, 1
+  %and.2 = and i32 %shl.2, 2
+  %or.1 = or i32 %and.2, %and.1
+  %or.2 = or i32 %or.1, %shl.1
+  %xor.1 = xor i32 %b, %or.2
+  %xor.2 = xor i32 %xor.1, %arg
+  %shr.2 = lshr i32 %shl.1, 1
+  %xor.3 = xor i32 %shr, %arg
+  %and.3 = and i32 %xor.3, 1
+  %and.4 = and i32 %iv, 2147483646
+  %or.3 = or i32 %and.3, %and.4
+  %and.5 = and i32 %iv, 254
+  %shl.3 = shl i32 %or.3, 1
+  %xor.4 = xor i32 %shl.3, 2
+  %or.4 = or i32 %and.5, %xor.4
+  %xor.5 = xor i32 %shr.2, %or.4
+  %xor.6 = xor i32 %xor.5, %xor.2
+  %and.6 = and i32 %xor.6, 255
+  %xor.7 = xor i32 %and.6, 1
+  %gep = getelementptr i8, ptr %a, i32 %xor.7
+  %ld = load i8, ptr %gep, align 1
+  %zext = zext i8 %ld to i32
+  %gep.2 = getelementptr i32, ptr null, i32 %zext
+  store i32 0, ptr %gep.2, align 4
+  %iv.next = add i32 %iv, 1
+  %cmp = icmp eq i32 %iv.next, 100
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index 3b0ad73..39217e5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -23,7 +23,7 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC:       [[ITER_CHECK]]:
 ; AUTO_VEC-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
-; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
 ; AUTO_VEC:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
 ; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -60,7 +60,7 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    [[TMP11:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST12]]
 ; AUTO_VEC-NEXT:    [[IND_END1:%.*]] = fadd fast float 1.000000e+00, [[TMP11]]
 ; AUTO_VEC-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
-; AUTO_VEC-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
 ; AUTO_VEC:       [[VEC_EPILOG_PH]]:
 ; AUTO_VEC-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -84,14 +84,14 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    br i1 [[TMP9]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; AUTO_VEC:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; AUTO_VEC-NEXT:    [[CMP_N9:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
-; AUTO_VEC-NEXT:    br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]]
-; AUTO_VEC:       [[FOR_BODY]]:
+; AUTO_VEC-NEXT:    br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; AUTO_VEC:       [[VEC_EPILOG_SCALAR_PH]]:
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL11:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
 ; AUTO_VEC:       [[LOOP]]:
-; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[FOR_BODY]] ]
+; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ]
+; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ]
 ; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; AUTO_VEC-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
@@ -144,19 +144,19 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-SAME: ptr noalias captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
 ; AUTO_VEC-NEXT:  [[ENTRY:.*:]]
 ; AUTO_VEC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N]], 0
-; AUTO_VEC-NEXT:    br i1 [[CMP4]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
-; AUTO_VEC:       [[FOR_BODY_PREHEADER]]:
-; AUTO_VEC-NEXT:    br label %[[FOR_BODY:.*]]
-; AUTO_VEC:       [[FOR_BODY]]:
-; AUTO_VEC-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
-; AUTO_VEC-NEXT:    [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], %[[FOR_BODY]] ], [ 1.000000e+00, %[[FOR_BODY_PREHEADER]] ]
+; AUTO_VEC-NEXT:    br i1 [[CMP4]], label %[[LOOP_PREHEADER:.*]], label %[[FOR_END:.*]]
+; AUTO_VEC:       [[LOOP_PREHEADER]]:
+; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
+; AUTO_VEC:       [[LOOP]]:
+; AUTO_VEC-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; AUTO_VEC-NEXT:    [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], %[[LOOP]] ], [ 1.000000e+00, %[[LOOP_PREHEADER]] ]
 ; AUTO_VEC-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
 ; AUTO_VEC-NEXT:    store float [[X_06_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_EPIL]] = fadd float [[X_06_EPIL]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
 ; AUTO_VEC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT_EPIL]] to i32
 ; AUTO_VEC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; AUTO_VEC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
+; AUTO_VEC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[LOOP]]
 ; AUTO_VEC:       [[FOR_END_LOOPEXIT]]:
 ; AUTO_VEC-NEXT:    br label %[[FOR_END]]
 ; AUTO_VEC:       [[FOR_END]]:
@@ -193,7 +193,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC-NEXT:  [[ENTRY:.*]]:
 ; AUTO_VEC-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16
-; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_PH:.*]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; AUTO_VEC:       [[VECTOR_PH]]:
 ; AUTO_VEC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 16
 ; AUTO_VEC-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
@@ -222,14 +222,14 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC:       [[MIDDLE_BLOCK]]:
 ; AUTO_VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    [[TMP7:%.*]] = fsub fast double [[TMP6]], 3.000000e+00
-; AUTO_VEC-NEXT:    br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
-; AUTO_VEC:       [[FOR_BODY]]:
+; AUTO_VEC-NEXT:    br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; AUTO_VEC:       [[SCALAR_PH]]:
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi double [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
 ; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
 ; AUTO_VEC:       [[LOOP]]:
-; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[FOR_BODY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
-; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[FOR_BODY]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ]
 ; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
 ; AUTO_VEC-NEXT:    store double [[J]], ptr [[T0]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT]] = add i64 [[I]], 1
@@ -261,19 +261,19 @@ for.end:
 define double @external_use_without_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC-LABEL: define double @external_use_without_fast_math(
 ; AUTO_VEC-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; AUTO_VEC-NEXT:  [[ENTRY_NEW:.*]]:
-; AUTO_VEC-NEXT:    br label %[[FOR_BODY:.*]]
-; AUTO_VEC:       [[FOR_BODY]]:
-; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], %[[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; AUTO_VEC-NEXT:  [[ENTRY:.*]]:
+; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
+; AUTO_VEC:       [[LOOP]]:
+; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT_7:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[J_NEXT_7:%.*]], %[[LOOP]] ]
 ; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
 ; AUTO_VEC-NEXT:    store double [[J]], ptr [[TMP7]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_7]] = add i64 [[I]], 1
 ; AUTO_VEC-NEXT:    [[J_NEXT_7]] = fadd double [[J]], 3.000000e+00
 ; AUTO_VEC-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT_7]], [[N]]
-; AUTO_VEC-NEXT:    br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
+; AUTO_VEC-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[FOR_END:.*]]
 ; AUTO_VEC:       [[FOR_END]]:
-; AUTO_VEC-NEXT:    [[J_LCSSA:%.*]] = phi double [ [[J]], %[[FOR_BODY]] ]
+; AUTO_VEC-NEXT:    [[J_LCSSA:%.*]] = phi double [ [[J]], %[[LOOP]] ]
 ; AUTO_VEC-NEXT:    ret double [[J_LCSSA]]
 ;
 entry:
@@ -308,7 +308,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:  [[ITER_CHECK:.*]]:
 ; AUTO_VEC-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
-; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
 ; AUTO_VEC:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
 ; AUTO_VEC-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32
 ; AUTO_VEC-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
@@ -353,7 +353,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    [[TMP12:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST16]]
 ; AUTO_VEC-NEXT:    [[IND_END1:%.*]] = fadd reassoc float 1.000000e+00, [[TMP12]]
 ; AUTO_VEC-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
-; AUTO_VEC-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
+; AUTO_VEC-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
 ; AUTO_VEC:       [[VEC_EPILOG_PH]]:
 ; AUTO_VEC-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -379,14 +379,14 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; AUTO_VEC:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
 ; AUTO_VEC-NEXT:    [[CMP_N18:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]]
-; AUTO_VEC-NEXT:    br i1 [[CMP_N18]], label %[[EXIT]], label %[[FOR_BODY]]
-; AUTO_VEC:       [[FOR_BODY]]:
+; AUTO_VEC-NEXT:    br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; AUTO_VEC:       [[VEC_EPILOG_SCALAR_PH]]:
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL14:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    [[BC_RESUME_VAL15:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
 ; AUTO_VEC-NEXT:    br label %[[LOOP:.*]]
 ; AUTO_VEC:       [[LOOP]]:
-; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[FOR_BODY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
-; AUTO_VEC-NEXT:    [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[FOR_BODY]] ], [ [[ADD3:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
+; AUTO_VEC-NEXT:    [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD3:%.*]], %[[LOOP]] ]
 ; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]]
 ; AUTO_VEC-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP16]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
index 8784873..f5329cf 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -454,6 +454,132 @@ exit:
   ret void
 }
 
+declare i1 @cond()
+
+define double @test_load_used_by_other_load_scev(ptr %ptr.a, ptr %ptr.b, ptr %ptr.c) {
+; I64-LABEL: define double @test_load_used_by_other_load_scev(
+; I64-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) {
+; I64-NEXT:  [[ENTRY:.*]]:
+; I64-NEXT:    br label %[[OUTER_LOOP:.*]]
+; I64:       [[OUTER_LOOP_LOOPEXIT:.*]]:
+; I64-NEXT:    br label %[[OUTER_LOOP]]
+; I64:       [[OUTER_LOOP]]:
+; I64-NEXT:    [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP29:%.*]], %[[OUTER_LOOP_LOOPEXIT]] ]
+; I64-NEXT:    [[COND:%.*]] = call i1 @cond()
+; I64-NEXT:    br i1 [[COND]], label %[[INNER_LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
+; I64:       [[INNER_LOOP_PREHEADER]]:
+; I64-NEXT:    br label %[[VECTOR_PH:.*]]
+; I64:       [[VECTOR_PH]]:
+; I64-NEXT:    br label %[[VECTOR_BODY:.*]]
+; I64:       [[VECTOR_BODY]]:
+; I64-NEXT:    [[TMP0:%.*]] = add i64 0, 1
+; I64-NEXT:    [[TMP1:%.*]] = add i64 1, 1
+; I64-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP0]]
+; I64-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP1]]
+; I64-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP0]]
+; I64-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP1]]
+; I64-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
+; I64-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
+; I64-NEXT:    [[TMP8:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP6]]
+; I64-NEXT:    [[TMP9:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP7]]
+; I64-NEXT:    [[TMP10:%.*]] = load double, ptr [[PTR_A]], align 8
+; I64-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i64 0
+; I64-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; I64-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], zeroinitializer
+; I64-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8
+; I64-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
+; I64-NEXT:    [[TMP14:%.*]] = load double, ptr [[TMP12]], align 8
+; I64-NEXT:    [[TMP15:%.*]] = load double, ptr [[TMP13]], align 8
+; I64-NEXT:    [[TMP16:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0
+; I64-NEXT:    [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[TMP15]], i32 1
+; I64-NEXT:    [[TMP18:%.*]] = fmul <2 x double> [[TMP11]], zeroinitializer
+; I64-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x double> poison, double [[ACCUM]], i64 0
+; I64-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT1]], <2 x double> poison, <2 x i32> zeroinitializer
+; I64-NEXT:    [[TMP19:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLAT2]], <2 x double> [[TMP18]], <2 x i32> <i32 1, i32 2>
+; I64-NEXT:    [[TMP20:%.*]] = fmul <2 x double> [[TMP17]], zeroinitializer
+; I64-NEXT:    [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], zeroinitializer
+; I64-NEXT:    [[TMP22:%.*]] = fadd <2 x double> [[TMP21]], splat (double 1.000000e+00)
+; I64-NEXT:    [[TMP23:%.*]] = load double, ptr [[TMP8]], align 8
+; I64-NEXT:    [[TMP24:%.*]] = load double, ptr [[TMP9]], align 8
+; I64-NEXT:    [[TMP25:%.*]] = insertelement <2 x double> poison, double [[TMP23]], i32 0
+; I64-NEXT:    [[TMP26:%.*]] = insertelement <2 x double> [[TMP25]], double [[TMP24]], i32 1
+; I64-NEXT:    [[TMP27:%.*]] = fdiv <2 x double> [[TMP26]], [[TMP22]]
+; I64-NEXT:    [[TMP28:%.*]] = fsub <2 x double> [[TMP19]], [[TMP27]]
+; I64-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; I64:       [[MIDDLE_BLOCK]]:
+; I64-NEXT:    [[TMP29]] = extractelement <2 x double> [[TMP28]], i32 1
+; I64-NEXT:    br label %[[OUTER_LOOP_LOOPEXIT]]
+; I64:       [[EXIT]]:
+; I64-NEXT:    ret double [[ACCUM]]
+;
+; I32-LABEL: define double @test_load_used_by_other_load_scev(
+; I32-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) {
+; I32-NEXT:  [[ENTRY:.*]]:
+; I32-NEXT:    br label %[[OUTER_LOOP:.*]]
+; I32:       [[OUTER_LOOP]]:
+; I32-NEXT:    [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[RESULT:%.*]], %[[INNER_LOOP:.*]] ]
+; I32-NEXT:    [[COND:%.*]] = call i1 @cond()
+; I32-NEXT:    br i1 [[COND]], label %[[INNER_LOOP]], label %[[EXIT:.*]]
+; I32:       [[INNER_LOOP]]:
+; I32-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[OUTER_LOOP]] ], [ [[IV_NEXT:%.*]], %[[INNER_LOOP]] ]
+; I32-NEXT:    [[ACCUM_INNER:%.*]] = phi double [ [[ACCUM]], %[[OUTER_LOOP]] ], [ [[MUL1:%.*]], %[[INNER_LOOP]] ]
+; I32-NEXT:    [[IDX_PLUS1:%.*]] = add i64 [[IV]], 1
+; I32-NEXT:    [[GEP_C:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[IDX_PLUS1]]
+; I32-NEXT:    [[GEP_A_I64:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[IDX_PLUS1]]
+; I32-NEXT:    [[LOAD_IDX:%.*]] = load i64, ptr [[GEP_A_I64]], align 8
+; I32-NEXT:    [[GEP_B:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[LOAD_IDX]]
+; I32-NEXT:    [[LOAD_A:%.*]] = load double, ptr [[PTR_A]], align 8
+; I32-NEXT:    [[ADD1:%.*]] = fadd double [[LOAD_A]], 0.000000e+00
+; I32-NEXT:    [[GEP_C_OFFSET:%.*]] = getelementptr i8, ptr [[GEP_C]], i64 8
+; I32-NEXT:    [[LOAD_C:%.*]] = load double, ptr [[GEP_C_OFFSET]], align 8
+; I32-NEXT:    [[MUL1]] = fmul double [[ADD1]], 0.000000e+00
+; I32-NEXT:    [[MUL2:%.*]] = fmul double [[LOAD_C]], 0.000000e+00
+; I32-NEXT:    [[ADD2:%.*]] = fadd double [[MUL2]], 0.000000e+00
+; I32-NEXT:    [[ADD3:%.*]] = fadd double [[ADD2]], 1.000000e+00
+; I32-NEXT:    [[LOAD_B:%.*]] = load double, ptr [[GEP_B]], align 8
+; I32-NEXT:    [[DIV:%.*]] = fdiv double [[LOAD_B]], [[ADD3]]
+; I32-NEXT:    [[RESULT]] = fsub double [[ACCUM_INNER]], [[DIV]]
+; I32-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; I32-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1
+; I32-NEXT:    br i1 [[EXITCOND]], label %[[OUTER_LOOP]], label %[[INNER_LOOP]]
+; I32:       [[EXIT]]:
+; I32-NEXT:    ret double [[ACCUM]]
+;
+entry:
+  br label %outer.loop
+
+outer.loop:
+  %accum = phi double [ 0.0, %entry ], [ %result, %inner.loop ]
+  %cond = call i1 @cond()
+  br i1 %cond, label %inner.loop, label %exit
+
+inner.loop:
+  %iv = phi i64 [ 0, %outer.loop ], [ %iv.next, %inner.loop ]
+  %accum.inner = phi double [ %accum, %outer.loop ], [ %mul1, %inner.loop ]
+  %idx.plus1 = add i64 %iv, 1
+  %gep.c = getelementptr i8, ptr %ptr.c, i64 %idx.plus1
+  %gep.a.i64 = getelementptr i64, ptr %ptr.a, i64 %idx.plus1
+  %load.idx = load i64, ptr %gep.a.i64, align 8
+  %gep.b = getelementptr double, ptr %ptr.b, i64 %load.idx
+  %load.a = load double, ptr %ptr.a, align 8
+  %add1 = fadd double %load.a, 0.000000e+00
+  %gep.c.offset = getelementptr i8, ptr %gep.c, i64 8
+  %load.c = load double, ptr %gep.c.offset, align 8
+  %mul1 = fmul double %add1, 0.000000e+00
+  %mul2 = fmul double %load.c, 0.000000e+00
+  %add2 = fadd double %mul2, 0.000000e+00
+  %add3 = fadd double %add2, 1.000000e+00
+  %load.b = load double, ptr %gep.b, align 8
+  %div = fdiv double %load.b, %add3
+  %result = fsub double %accum.inner, %div
+  %iv.next = add i64 %iv, 1
+  %exitcond = icmp eq i64 %iv, 1
+  br i1 %exitcond, label %outer.loop, label %inner.loop
+
+exit:
+  ret double %accum
+}
+
 attributes #0 = { "target-cpu"="znver2" }
 
 !0 = distinct !{!0, !1}
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 75420d4..bcea03a 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -1182,31 +1182,13 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD1]], zeroinitializer
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
-; CHECK-NEXT:    br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK:       [[PRED_LOAD_IF]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
-; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
-; CHECK:       [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT:    [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
-; CHECK-NEXT:    br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK:       [[PRED_LOAD_IF1]]:
-; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1
-; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK:       [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD1]], zeroinitializer
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD1]], <2 x i32> [[WIDE_LOAD2]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll
new file mode 100644
index 0000000..b020e59
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -p loop-vectorize -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S %s | FileCheck %s
+
+@end = external global [128 x i8]
+
+; Test case for https://github.com/llvm/llvm-project/issues/162128.
+define void @test_epilogue_step_scev_expansion(ptr %dst) {
+; CHECK-LABEL: define void @test_epilogue_step_scev_expansion(
+; CHECK-SAME: ptr [[DST:%.*]]) {
+; CHECK-NEXT:  [[ITER_CHECK:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT:    br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_MOD_VF]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x i8> zeroinitializer, ptr [[TMP0]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK:       [[VEC_EPILOG_ITER_CHECK]]:
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_VEC]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
+; CHECK:       [[VEC_EPILOG_PH]]:
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF1:%.*]] = urem i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), 4
+; CHECK-NEXT:    [[N_VEC2:%.*]] = sub i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_MOD_VF1]]
+; CHECK-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
+; CHECK:       [[VEC_EPILOG_VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX3]]
+; CHECK-NEXT:    store <4 x i8> zeroinitializer, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT4]], [[N_VEC2]]
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N5:%.*]] = icmp eq i64 sub (i64 0, i64 ptrtoint (ptr @end to i64)), [[N_VEC2]]
+; CHECK-NEXT:    br i1 [[CMP_N5]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       [[VEC_EPILOG_SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT:    store i8 0, ptr [[GEP_DST]], align 1
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], sub (i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @end, i64 1) to i64))
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.dst = getelementptr i8, ptr %dst, i64 %iv
+  store i8 0, ptr %gep.dst, align 1
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, sub (i64 0, i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @end, i64 1) to i64))
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
index af57967..b63ab8f 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
@@ -22,7 +22,6 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 256)
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
index f794620..cc3bda4 100644
--- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
@@ -504,24 +504,35 @@ exit:
 define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context(ptr noalias %p1, ptr noalias %p2) nosync {
 ; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_size_nofree_via_context(
 ; CHECK-SAME: ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ]
 ; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ]
-; CHECK-NEXT:    br label %[[LOOP:.*]]
-; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
-; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[ARRAYIDX2]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]]
-; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[TMP1]], align 1
-; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT:    br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
-; CHECK:       [[LOOP_INC]]:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX1]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT:    [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
+; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       [[MIDDLE_SPLIT]]:
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[LOOP_END:.*]]
+; CHECK:       [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true)
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]]
+; CHECK-NEXT:    br label %[[LOOP_END]]
 ; CHECK:       [[LOOP_END]]:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[INDEX1]], %[[LOOP]] ], [ -1, %[[LOOP_INC]] ]
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
 ; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
diff --git a/llvm/test/Transforms/LowerTypeTests/simple.ll b/llvm/test/Transforms/LowerTypeTests/simple.ll
index 6fb8f6f..173a6ae 100644
--- a/llvm/test/Transforms/LowerTypeTests/simple.ll
+++ b/llvm/test/Transforms/LowerTypeTests/simple.ll
@@ -56,7 +56,7 @@ define i1 @foo(ptr %p) {
 
   ; CHECK: [[R8:%[^ ]*]] = getelementptr i8, ptr @bits_use.{{[0-9]*}}, i32 [[R5]]
   ; CHECK: [[R9:%[^ ]*]] = load i8, ptr [[R8]]
-  ; CHECK: [[R10:%[^ ]*]] = and i8 [[R9]], 1
+  ; CHECK: [[R10:%[^ ]*]] = and i8 [[R9]], ptrtoint (ptr inttoptr (i8 1 to ptr) to i8)
   ; CHECK: [[R11:%[^ ]*]] = icmp ne i8 [[R10]], 0
 
   ; CHECK: [[R16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[R11]], {{%[^ ]*}} ]
@@ -91,7 +91,7 @@ define i1 @baz(ptr %p) {
 
   ; CHECK: [[T8:%[^ ]*]] = getelementptr i8, ptr @bits_use{{(\.[0-9]*)?}}, i32 [[T5]]
   ; CHECK: [[T9:%[^ ]*]] = load i8, ptr [[T8]]
-  ; CHECK: [[T10:%[^ ]*]] = and i8 [[T9]], 2
+  ; CHECK: [[T10:%[^ ]*]] = and i8 [[T9]], ptrtoint (ptr inttoptr (i8 2 to ptr) to i8)
   ; CHECK: [[T11:%[^ ]*]] = icmp ne i8 [[T10]], 0
 
   ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T11]], {{%[^ ]*}} ]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/extra-unroll-simplifications.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/extra-unroll-simplifications.ll
index d8fc42b..57dacd4 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/extra-unroll-simplifications.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/extra-unroll-simplifications.ll
@@ -14,7 +14,7 @@ define void @partial_unroll_forced(i32 %N, ptr %src, ptr noalias %dst) {
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[N]], 1
-; CHECK-NEXT:    br i1 [[TMP0]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[LOOP_LATCH_PREHEADER_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[LOOP_LATCH_EPIL_PREHEADER:%.*]], label [[LOOP_LATCH_PREHEADER_NEW:%.*]]
 ; CHECK:       loop.latch.preheader.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483646
 ; CHECK-NEXT:    br label [[LOOP_LATCH:%.*]]
@@ -35,12 +35,14 @@ define void @partial_unroll_forced(i32 %N, ptr %src, ptr noalias %dst) {
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_LOOPEXIT_UNR_LCSSA]], label [[LOOP_LATCH]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[LOOP_LATCH]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       exit.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[LOOP_LATCH_PREHEADER]] ], [ [[INDVARS_IV_NEXT_1]], [[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[EXIT]], label [[LOOP_LATCH_EPIL:%.*]]
-; CHECK:       loop.latch.epil:
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[EXIT]], label [[LOOP_LATCH_EPIL_PREHEADER]]
+; CHECK:       loop.latch.epil.preheader:
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[LOOP_LATCH_PREHEADER]] ], [ [[INDVARS_IV_NEXT_1]], [[EXIT_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD4:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[LCMP_MOD4]])
 ; CHECK-NEXT:    [[SRC_IDX_EPIL:%.*]] = getelementptr <8 x half>, ptr [[SRC]], i64 [[INDVARS_IV_UNR]]
 ; CHECK-NEXT:    [[L_EPIL:%.*]] = load <8 x half>, ptr [[SRC_IDX_EPIL]], align 16
 ; CHECK-NEXT:    [[DST_IDX_EPIL:%.*]] = getelementptr <8 x half>, ptr [[DST]], i64 [[INDVARS_IV_UNR]]
@@ -84,7 +86,7 @@ define void @cse_matching_load_from_previous_unrolled_iteration(i32 %N, ptr %src
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[N]], 1
-; CHECK-NEXT:    br i1 [[TMP0]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[LOOP_LATCH_PREHEADER_NEW:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[LOOP_LATCH_EPIL_PREHEADER:%.*]], label [[LOOP_LATCH_PREHEADER_NEW:%.*]]
 ; CHECK:       loop.latch.preheader.new:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483646
 ; CHECK-NEXT:    br label [[LOOP_LATCH:%.*]]
@@ -107,12 +109,14 @@ define void @cse_matching_load_from_previous_unrolled_iteration(i32 %N, ptr %src
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_LOOPEXIT_UNR_LCSSA]], label [[LOOP_LATCH]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[LOOP_LATCH]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit.loopexit.unr-lcssa:
-; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[LOOP_LATCH_PREHEADER]] ], [ [[INDVARS_IV_NEXT_1]], [[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[EXIT]], label [[LOOP_LATCH_EPIL:%.*]]
-; CHECK:       loop.latch.epil:
+; CHECK-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[EXIT]], label [[LOOP_LATCH_EPIL_PREHEADER]]
+; CHECK:       loop.latch.epil.preheader:
+; CHECK-NEXT:    [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[LOOP_LATCH_PREHEADER]] ], [ [[INDVARS_IV_NEXT_1]], [[EXIT_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD4:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[LCMP_MOD4]])
 ; CHECK-NEXT:    [[GEP_SRC_12_EPIL:%.*]] = getelementptr <2 x i32>, ptr [[SRC_12]], i64 [[INDVARS_IV_UNR]]
 ; CHECK-NEXT:    [[L_12_EPIL:%.*]] = load <2 x i32>, ptr [[GEP_SRC_12_EPIL]], align 8
 ; CHECK-NEXT:    [[GEP_SRC_4_EPIL:%.*]] = getelementptr <2 x i32>, ptr [[SRC_4]], i64 [[INDVARS_IV_UNR]]
diff --git a/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll
new file mode 100644
index 0000000..caf7a80
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll
@@ -0,0 +1,447 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -O3 < %s | FileCheck %s
+
+target datalayout = "n64:32:16:8"
+
+define i8 @test(i8 %x) {
+; CHECK-LABEL: define range(i8 0, 53) i8 @test(
+; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[START:.*:]]
+; CHECK-NEXT:    [[X_:%.*]] = tail call i8 @llvm.umin.i8(i8 [[X]], i8 52)
+; CHECK-NEXT:    ret i8 [[X_]]
+;
+start:
+  %_0 = alloca [1 x i8], align 1
+  %0 = icmp eq i8 %x, 0
+  br i1 %0, label %bb1, label %bb2
+
+bb1:                                              ; preds = %start
+  store i8 0, ptr %_0, align 1
+  br label %bb105
+
+bb2:                                              ; preds = %start
+  %1 = icmp eq i8 %x, 1
+  br i1 %1, label %bb3, label %bb4
+
+bb105:                                            ; preds = %bb104, %bb103, %bb101, %bb99, %bb97, %bb95, %bb93, %bb91, %bb89, %bb87, %bb85, %bb83, %bb81, %bb79, %bb77, %bb75, %bb73, %bb71, %bb69, %bb67, %bb65, %bb63, %bb61, %bb59, %bb57, %bb55, %bb53, %bb51, %bb49, %bb47, %bb45, %bb43, %bb41, %bb39, %bb37, %bb35, %bb33, %bb31, %bb29, %bb27, %bb25, %bb23, %bb21, %bb19, %bb17, %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1
+  %2 = load i8, ptr %_0, align 1
+  ret i8 %2
+
+bb3:                                              ; preds = %bb2
+  store i8 1, ptr %_0, align 1
+  br label %bb105
+
+bb4:                                              ; preds = %bb2
+  %3 = icmp eq i8 %x, 2
+  br i1 %3, label %bb5, label %bb6
+
+bb5:                                              ; preds = %bb4
+  store i8 2, ptr %_0, align 1
+  br label %bb105
+
+bb6:                                              ; preds = %bb4
+  %4 = icmp eq i8 %x, 3
+  br i1 %4, label %bb7, label %bb8
+
+bb7:                                              ; preds = %bb6
+  store i8 3, ptr %_0, align 1
+  br label %bb105
+
+bb8:                                              ; preds = %bb6
+  %5 = icmp eq i8 %x, 4
+  br i1 %5, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb8
+  store i8 4, ptr %_0, align 1
+  br label %bb105
+
+bb10:                                             ; preds = %bb8
+  %6 = icmp eq i8 %x, 5
+  br i1 %6, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb10
+  store i8 5, ptr %_0, align 1
+  br label %bb105
+
+bb12:                                             ; preds = %bb10
+  %7 = icmp eq i8 %x, 6
+  br i1 %7, label %bb13, label %bb14
+
+bb13:                                             ; preds = %bb12
+  store i8 6, ptr %_0, align 1
+  br label %bb105
+
+bb14:                                             ; preds = %bb12
+  %8 = icmp eq i8 %x, 7
+  br i1 %8, label %bb15, label %bb16
+
+bb15:                                             ; preds = %bb14
+  store i8 7, ptr %_0, align 1
+  br label %bb105
+
+bb16:                                             ; preds = %bb14
+  %9 = icmp eq i8 %x, 8
+  br i1 %9, label %bb17, label %bb18
+
+bb17:                                             ; preds = %bb16
+  store i8 8, ptr %_0, align 1
+  br label %bb105
+
+bb18:                                             ; preds = %bb16
+  %10 = icmp eq i8 %x, 9
+  br i1 %10, label %bb19, label %bb20
+
+bb19:                                             ; preds = %bb18
+  store i8 9, ptr %_0, align 1
+  br label %bb105
+
+bb20:                                             ; preds = %bb18
+  %11 = icmp eq i8 %x, 10
+  br i1 %11, label %bb21, label %bb22
+
+bb21:                                             ; preds = %bb20
+  store i8 10, ptr %_0, align 1
+  br label %bb105
+
+bb22:                                             ; preds = %bb20
+  %12 = icmp eq i8 %x, 11
+  br i1 %12, label %bb23, label %bb24
+
+bb23:                                             ; preds = %bb22
+  store i8 11, ptr %_0, align 1
+  br label %bb105
+
+bb24:                                             ; preds = %bb22
+  %13 = icmp eq i8 %x, 12
+  br i1 %13, label %bb25, label %bb26
+
+bb25:                                             ; preds = %bb24
+  store i8 12, ptr %_0, align 1
+  br label %bb105
+
+bb26:                                             ; preds = %bb24
+  %14 = icmp eq i8 %x, 13
+  br i1 %14, label %bb27, label %bb28
+
+bb27:                                             ; preds = %bb26
+  store i8 13, ptr %_0, align 1
+  br label %bb105
+
+bb28:                                             ; preds = %bb26
+  %15 = icmp eq i8 %x, 14
+  br i1 %15, label %bb29, label %bb30
+
+bb29:                                             ; preds = %bb28
+  store i8 14, ptr %_0, align 1
+  br label %bb105
+
+bb30:                                             ; preds = %bb28
+  %16 = icmp eq i8 %x, 15
+  br i1 %16, label %bb31, label %bb32
+
+bb31:                                             ; preds = %bb30
+  store i8 15, ptr %_0, align 1
+  br label %bb105
+
+bb32:                                             ; preds = %bb30
+  %17 = icmp eq i8 %x, 16
+  br i1 %17, label %bb33, label %bb34
+
+bb33:                                             ; preds = %bb32
+  store i8 16, ptr %_0, align 1
+  br label %bb105
+
+bb34:                                             ; preds = %bb32
+  %18 = icmp eq i8 %x, 17
+  br i1 %18, label %bb35, label %bb36
+
+bb35:                                             ; preds = %bb34
+  store i8 17, ptr %_0, align 1
+  br label %bb105
+
+bb36:                                             ; preds = %bb34
+  %19 = icmp eq i8 %x, 18
+  br i1 %19, label %bb37, label %bb38
+
+bb37:                                             ; preds = %bb36
+  store i8 18, ptr %_0, align 1
+  br label %bb105
+
+bb38:                                             ; preds = %bb36
+  %20 = icmp eq i8 %x, 19
+  br i1 %20, label %bb39, label %bb40
+
+bb39:                                             ; preds = %bb38
+  store i8 19, ptr %_0, align 1
+  br label %bb105
+
+bb40:                                             ; preds = %bb38
+  %21 = icmp eq i8 %x, 20
+  br i1 %21, label %bb41, label %bb42
+
+bb41:                                             ; preds = %bb40
+  store i8 20, ptr %_0, align 1
+  br label %bb105
+
+bb42:                                             ; preds = %bb40
+  %22 = icmp eq i8 %x, 21
+  br i1 %22, label %bb43, label %bb44
+
+bb43:                                             ; preds = %bb42
+  store i8 21, ptr %_0, align 1
+  br label %bb105
+
+bb44:                                             ; preds = %bb42
+  %23 = icmp eq i8 %x, 22
+  br i1 %23, label %bb45, label %bb46
+
+bb45:                                             ; preds = %bb44
+  store i8 22, ptr %_0, align 1
+  br label %bb105
+
+bb46:                                             ; preds = %bb44
+  %24 = icmp eq i8 %x, 23
+  br i1 %24, label %bb47, label %bb48
+
+bb47:                                             ; preds = %bb46
+  store i8 23, ptr %_0, align 1
+  br label %bb105
+
+bb48:                                             ; preds = %bb46
+  %25 = icmp eq i8 %x, 24
+  br i1 %25, label %bb49, label %bb50
+
+bb49:                                             ; preds = %bb48
+  store i8 24, ptr %_0, align 1
+  br label %bb105
+
+bb50:                                             ; preds = %bb48
+  %26 = icmp eq i8 %x, 25
+  br i1 %26, label %bb51, label %bb52
+
+bb51:                                             ; preds = %bb50
+  store i8 25, ptr %_0, align 1
+  br label %bb105
+
+bb52:                                             ; preds = %bb50
+  %27 = icmp eq i8 %x, 26
+  br i1 %27, label %bb53, label %bb54
+
+bb53:                                             ; preds = %bb52
+  store i8 26, ptr %_0, align 1
+  br label %bb105
+
+bb54:                                             ; preds = %bb52
+  %28 = icmp eq i8 %x, 27
+  br i1 %28, label %bb55, label %bb56
+
+bb55:                                             ; preds = %bb54
+  store i8 27, ptr %_0, align 1
+  br label %bb105
+
+bb56:                                             ; preds = %bb54
+  %29 = icmp eq i8 %x, 28
+  br i1 %29, label %bb57, label %bb58
+
+bb57:                                             ; preds = %bb56
+  store i8 28, ptr %_0, align 1
+  br label %bb105
+
+bb58:                                             ; preds = %bb56
+  %30 = icmp eq i8 %x, 29
+  br i1 %30, label %bb59, label %bb60
+
+bb59:                                             ; preds = %bb58
+  store i8 29, ptr %_0, align 1
+  br label %bb105
+
+bb60:                                             ; preds = %bb58
+  %31 = icmp eq i8 %x, 30
+  br i1 %31, label %bb61, label %bb62
+
+bb61:                                             ; preds = %bb60
+  store i8 30, ptr %_0, align 1
+  br label %bb105
+
+bb62:                                             ; preds = %bb60
+  %32 = icmp eq i8 %x, 31
+  br i1 %32, label %bb63, label %bb64
+
+bb63:                                             ; preds = %bb62
+  store i8 31, ptr %_0, align 1
+  br label %bb105
+
+bb64:                                             ; preds = %bb62
+  %33 = icmp eq i8 %x, 32
+  br i1 %33, label %bb65, label %bb66
+
+bb65:                                             ; preds = %bb64
+  store i8 32, ptr %_0, align 1
+  br label %bb105
+
+bb66:                                             ; preds = %bb64
+  %34 = icmp eq i8 %x, 33
+  br i1 %34, label %bb67, label %bb68
+
+bb67:                                             ; preds = %bb66
+  store i8 33, ptr %_0, align 1
+  br label %bb105
+
+bb68:                                             ; preds = %bb66
+  %35 = icmp eq i8 %x, 34
+  br i1 %35, label %bb69, label %bb70
+
+bb69:                                             ; preds = %bb68
+  store i8 34, ptr %_0, align 1
+  br label %bb105
+
+bb70:                                             ; preds = %bb68
+  %36 = icmp eq i8 %x, 35
+  br i1 %36, label %bb71, label %bb72
+
+bb71:                                             ; preds = %bb70
+  store i8 35, ptr %_0, align 1
+  br label %bb105
+
+bb72:                                             ; preds = %bb70
+  %37 = icmp eq i8 %x, 36
+  br i1 %37, label %bb73, label %bb74
+
+bb73:                                             ; preds = %bb72
+  store i8 36, ptr %_0, align 1
+  br label %bb105
+
+bb74:                                             ; preds = %bb72
+  %38 = icmp eq i8 %x, 37
+  br i1 %38, label %bb75, label %bb76
+
+bb75:                                             ; preds = %bb74
+  store i8 37, ptr %_0, align 1
+  br label %bb105
+
+bb76:                                             ; preds = %bb74
+  %39 = icmp eq i8 %x, 38
+  br i1 %39, label %bb77, label %bb78
+
+bb77:                                             ; preds = %bb76
+  store i8 38, ptr %_0, align 1
+  br label %bb105
+
+bb78:                                             ; preds = %bb76
+  %40 = icmp eq i8 %x, 39
+  br i1 %40, label %bb79, label %bb80
+
+bb79:                                             ; preds = %bb78
+  store i8 39, ptr %_0, align 1
+  br label %bb105
+
+bb80:                                             ; preds = %bb78
+  %41 = icmp eq i8 %x, 40
+  br i1 %41, label %bb81, label %bb82
+
+bb81:                                             ; preds = %bb80
+  store i8 40, ptr %_0, align 1
+  br label %bb105
+
+bb82:                                             ; preds = %bb80
+  %42 = icmp eq i8 %x, 41
+  br i1 %42, label %bb83, label %bb84
+
+bb83:                                             ; preds = %bb82
+  store i8 41, ptr %_0, align 1
+  br label %bb105
+
+bb84:                                             ; preds = %bb82
+  %43 = icmp eq i8 %x, 42
+  br i1 %43, label %bb85, label %bb86
+
+bb85:                                             ; preds = %bb84
+  store i8 42, ptr %_0, align 1
+  br label %bb105
+
+bb86:                                             ; preds = %bb84
+  %44 = icmp eq i8 %x, 43
+  br i1 %44, label %bb87, label %bb88
+
+bb87:                                             ; preds = %bb86
+  store i8 43, ptr %_0, align 1
+  br label %bb105
+
+bb88:                                             ; preds = %bb86
+  %45 = icmp eq i8 %x, 44
+  br i1 %45, label %bb89, label %bb90
+
+bb89:                                             ; preds = %bb88
+  store i8 44, ptr %_0, align 1
+  br label %bb105
+
+bb90:                                             ; preds = %bb88
+  %46 = icmp eq i8 %x, 45
+  br i1 %46, label %bb91, label %bb92
+
+bb91:                                             ; preds = %bb90
+  store i8 45, ptr %_0, align 1
+  br label %bb105
+
+bb92:                                             ; preds = %bb90
+  %47 = icmp eq i8 %x, 46
+  br i1 %47, label %bb93, label %bb94
+
+bb93:                                             ; preds = %bb92
+  store i8 46, ptr %_0, align 1
+  br label %bb105
+
+bb94:                                             ; preds = %bb92
+  %48 = icmp eq i8 %x, 47
+  br i1 %48, label %bb95, label %bb96
+
+bb95:                                             ; preds = %bb94
+  store i8 47, ptr %_0, align 1
+  br label %bb105
+
+bb96:                                             ; preds = %bb94
+  %49 = icmp eq i8 %x, 48
+  br i1 %49, label %bb97, label %bb98
+
+bb97:                                             ; preds = %bb96
+  store i8 48, ptr %_0, align 1
+  br label %bb105
+
+bb98:                                             ; preds = %bb96
+  %50 = icmp eq i8 %x, 49
+  br i1 %50, label %bb99, label %bb100
+
+bb99:                                             ; preds = %bb98
+  store i8 49, ptr %_0, align 1
+  br label %bb105
+
+bb100:                                            ; preds = %bb98
+  %51 = icmp eq i8 %x, 50
+  br i1 %51, label %bb101, label %bb102
+
+bb101:                                            ; preds = %bb100
+  store i8 50, ptr %_0, align 1
+  br label %bb105
+
+bb102:                                            ; preds = %bb100
+  %52 = icmp eq i8 %x, 51
+  br i1 %52, label %bb103, label %bb104
+
+bb103:                                            ; preds = %bb102
+  store i8 51, ptr %_0, align 1
+  br label %bb105
+
+bb104:                                            ; preds = %bb102
+  store i8 52, ptr %_0, align 1
+  br label %bb105
+}
+
+define i8 @test2(i8 %x) {
+; CHECK-LABEL: define range(i8 0, 53) i8 @test2(
+; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i8 @test(i8 [[X]])
+; CHECK-NEXT:    ret i8 [[CALL]]
+;
+  %call = call i8 @test(i8 %x)
+  ret i8 %call
+}
diff --git a/llvm/test/Transforms/SCCP/binaryops-constexprs.ll b/llvm/test/Transforms/SCCP/binaryops-constexprs.ll
index 31d816c..bf4a366 100644
--- a/llvm/test/Transforms/SCCP/binaryops-constexprs.ll
+++ b/llvm/test/Transforms/SCCP/binaryops-constexprs.ll
@@ -8,10 +8,12 @@ define void @and_constexpr(i32 %a) {
 ; CHECK-LABEL: @and_constexpr(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @use.i32(i32 0)
-; CHECK-NEXT:    [[AND_2:%.*]] = and i32 20, [[A:%.*]]
+; CHECK-NEXT:    [[AND_2:%.*]] = and i32 ptrtoint (ptr inttoptr (i32 20 to ptr) to i32), [[A:%.*]]
 ; CHECK-NEXT:    call void @use.i32(i32 [[AND_2]])
-; CHECK-NEXT:    call void @use.i1(i1 true)
-; CHECK-NEXT:    call void @use.i1(i1 false)
+; CHECK-NEXT:    [[TRUE_1:%.*]] = icmp ne i32 [[AND_2]], 100
+; CHECK-NEXT:    call void @use.i1(i1 [[TRUE_1]])
+; CHECK-NEXT:    [[FALSE_1:%.*]] = icmp eq i32 [[AND_2]], 100
+; CHECK-NEXT:    call void @use.i1(i1 [[FALSE_1]])
 ; CHECK-NEXT:    [[COND_1:%.*]] = icmp eq i32 [[AND_2]], 10
 ; CHECK-NEXT:    call void @use.i1(i1 [[COND_1]])
 ; CHECK-NEXT:    call void @use.i32(i32 4)
@@ -38,7 +40,7 @@ define void @add_constexpr(i32 %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 0, [[A:%.*]]
 ; CHECK-NEXT:    call void @use.i32(i32 [[ADD_1]])
-; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 20, [[A]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 ptrtoint (ptr inttoptr (i32 20 to ptr) to i32), [[A]]
 ; CHECK-NEXT:    call void @use.i32(i32 [[ADD_2]])
 ; CHECK-NEXT:    [[COND_1:%.*]] = icmp ne i32 [[ADD_2]], 100
 ; CHECK-NEXT:    call void @use.i1(i1 [[COND_1]])
@@ -46,7 +48,7 @@ define void @add_constexpr(i32 %a) {
 ; CHECK-NEXT:    call void @use.i1(i1 [[COND_2]])
 ; CHECK-NEXT:    [[COND_3:%.*]] = icmp eq i32 [[ADD_2]], 10
 ; CHECK-NEXT:    call void @use.i1(i1 [[COND_3]])
-; CHECK-NEXT:    call void @use.i32(i32 120)
+; CHECK-NEXT:    call void @use.i32(i32 add (i32 ptrtoint (ptr inttoptr (i32 20 to ptr) to i32), i32 ptrtoint (ptr inttoptr (i32 100 to ptr) to i32)))
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -69,7 +71,7 @@ define void @mul_constexpr(i32 %a) {
 ; CHECK-LABEL: @mul_constexpr(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @use.i32(i32 0)
-; CHECK-NEXT:    [[MUL_2:%.*]] = mul i32 20, [[A:%.*]]
+; CHECK-NEXT:    [[MUL_2:%.*]] = mul i32 ptrtoint (ptr inttoptr (i32 20 to ptr) to i32), [[A:%.*]]
 ; CHECK-NEXT:    call void @use.i32(i32 [[MUL_2]])
 ; CHECK-NEXT:    [[COND_1:%.*]] = icmp ne i32 [[MUL_2]], 100
 ; CHECK-NEXT:    call void @use.i1(i1 [[COND_1]])
@@ -77,7 +79,8 @@ define void @mul_constexpr(i32 %a) {
 ; CHECK-NEXT:    call void @use.i1(i1 [[COND_2]])
 ; CHECK-NEXT:    [[COND_3:%.*]] = icmp eq i32 [[MUL_2]], 10
 ; CHECK-NEXT:    call void @use.i1(i1 [[COND_3]])
-; CHECK-NEXT:    call void @use.i32(i32 2000)
+; CHECK-NEXT:    [[MUL_3:%.*]] = mul i32 ptrtoint (ptr inttoptr (i32 20 to ptr) to i32), ptrtoint (ptr inttoptr (i32 100 to ptr) to i32)
+; CHECK-NEXT:    call void @use.i32(i32 [[MUL_3]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -100,13 +103,16 @@ define void @udiv_constexpr(i32 %a) {
 ; CHECK-LABEL: @udiv_constexpr(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @use.i32(i32 0)
-; CHECK-NEXT:    [[UDIV_2:%.*]] = udiv i32 20, [[A:%.*]]
+; CHECK-NEXT:    [[UDIV_2:%.*]] = udiv i32 ptrtoint (ptr inttoptr (i32 20 to ptr) to i32), [[A:%.*]]
 ; CHECK-NEXT:    call void @use.i32(i32 [[UDIV_2]])
-; CHECK-NEXT:    call void @use.i1(i1 true)
-; CHECK-NEXT:    call void @use.i1(i1 false)
+; CHECK-NEXT:    [[TRUE_1:%.*]] = icmp ne i32 [[UDIV_2]], 100
+; CHECK-NEXT:    call void @use.i1(i1 [[TRUE_1]])
+; CHECK-NEXT:    [[FALSE_1:%.*]] = icmp eq i32 [[UDIV_2]], 50
+; CHECK-NEXT:    call void @use.i1(i1 [[FALSE_1]])
 ; CHECK-NEXT:    [[COND_1:%.*]] = icmp eq i32 [[UDIV_2]], 10
 ; CHECK-NEXT:    call void @use.i1(i1 [[COND_1]])
-; CHECK-NEXT:    call void @use.i32(i32 0)
+; CHECK-NEXT:    [[UDIV_3:%.*]] = udiv i32 ptrtoint (ptr inttoptr (i32 20 to ptr) to i32), ptrtoint (ptr inttoptr (i32 100 to ptr) to i32)
+; CHECK-NEXT:    call void @use.i32(i32 [[UDIV_3]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index cf62fd5..a8880274 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -4,21 +4,14 @@
 define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: @test_add_sdiv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2
-; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
-; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4
-; CHECK-NEXT:    [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4
-; CHECK-NEXT:    [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT:    [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -58,21 +51,14 @@ entry:
 define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 ; CHECK-LABEL: @test_add_udiv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
-; CHECK-NEXT:    [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
-; CHECK-NEXT:    [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
-; CHECK-NEXT:    [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
-; CHECK-NEXT:    [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
-; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
-; CHECK-NEXT:    [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SimplifyCFG/indirectbr.ll b/llvm/test/Transforms/SimplifyCFG/indirectbr.ll
index 87d8b39..2fa36b0 100644
--- a/llvm/test/Transforms/SimplifyCFG/indirectbr.ll
+++ b/llvm/test/Transforms/SimplifyCFG/indirectbr.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s
 
 ; SimplifyCFG should eliminate redundant indirectbr edges.
@@ -8,7 +8,11 @@ declare void @A()
 declare void @B(i32)
 declare void @C()
 
-define void @indbrtest0(ptr %P, ptr %Q) {
+;.
+; CHECK: @anchor = constant [13 x ptr] [ptr blockaddress(@indbrtest3, %L1), ptr blockaddress(@indbrtest3, %L2), ptr inttoptr (i32 1 to ptr), ptr blockaddress(@indbrtest4, %L1), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr), ptr inttoptr (i32 1 to ptr)]
+; CHECK: @xblkx.bbs = internal unnamed_addr constant [9 x ptr] [ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %v2j), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %xlab4x), ptr blockaddress(@indbrtest7, %v2j)]
+;.
+define void @indbrtest0(ptr %P, ptr %Q) !prof !0 {
 ; CHECK-LABEL: @indbrtest0(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store ptr blockaddress(@indbrtest0, [[BB0:%.*]]), ptr [[P:%.*]], align 8
@@ -16,7 +20,7 @@ define void @indbrtest0(ptr %P, ptr %Q) {
 ; CHECK-NEXT:    store ptr blockaddress(@indbrtest0, [[BB2:%.*]]), ptr [[P]], align 8
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    [[T:%.*]] = load ptr, ptr [[Q:%.*]], align 8
-; CHECK-NEXT:    indirectbr ptr [[T]], [label [[BB0]], label [[BB1]], label %BB2]
+; CHECK-NEXT:    indirectbr ptr [[T]], [label [[BB0]], label [[BB1]], label %BB2], !prof [[PROF1:![0-9]+]]
 ; CHECK:       BB0:
 ; CHECK-NEXT:    call void @A()
 ; CHECK-NEXT:    br label [[BB1]]
@@ -36,7 +40,7 @@ entry:
   store ptr blockaddress(@indbrtest0, %BB2), ptr %P
   call void @foo()
   %t = load ptr, ptr %Q
-  indirectbr ptr %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2]
+  indirectbr ptr %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2], !prof !1
 BB0:
   call void @A()
   br label %BB1
@@ -103,10 +107,10 @@ BB0:
 ; SimplifyCFG should turn the indirectbr into a conditional branch on the
 ; condition of the select.
 
-define void @indbrtest3(i1 %cond, ptr %address) nounwind {
+define void @indbrtest3(i1 %cond, ptr %address) nounwind !prof !0 {
 ; CHECK-LABEL: @indbrtest3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[COND:%.*]], label [[L1:%.*]], label [[L2:%.*]]
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[L1:%.*]], label [[L2:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:       common.ret:
 ; CHECK-NEXT:    ret void
 ; CHECK:       L1:
@@ -117,8 +121,8 @@ define void @indbrtest3(i1 %cond, ptr %address) nounwind {
 ; CHECK-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
-  %indirect.goto.dest = select i1 %cond, ptr blockaddress(@indbrtest3, %L1), ptr blockaddress(@indbrtest3, %L2)
-  indirectbr ptr %indirect.goto.dest, [label %L1, label %L2, label %L3]
+  %indirect.goto.dest = select i1 %cond, ptr blockaddress(@indbrtest3, %L1), ptr blockaddress(@indbrtest3, %L2), !prof !2
+  indirectbr ptr %indirect.goto.dest, [label %L1, label %L2, label %L3], !prof !3
 
 L1:
   call void @A()
@@ -385,3 +389,15 @@ declare i32 @xfunc5x()
 declare i8 @xfunc7x()
 declare i32 @xselectorx()
 declare i32 @xactionx()
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 3, i32 5, i32 7, i32 11, i32 13, i32 17}
+!2 = !{!"branch_weights", i32 3, i32 5}
+!3 = !{!"branch_weights", i32 3, i32 5, i32 7}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 14, i32 18, i32 24}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 5}
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll
new file mode 100644
index 0000000..9bbe3eb
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+declare ptr @_Znwm(i64)
+
+define ptr @test_merge_alloc_token_same(i1 %b) {
+; CHECK-LABEL: define ptr @test_merge_alloc_token_same(
+; CHECK-SAME: i1 [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @_Znwm(i64 4), !alloc_token [[META0:![0-9]+]]
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
+entry:
+  br i1 %b, label %if.then, label %if.else
+
+if.then:
+  %call = call ptr @_Znwm(i64 4), !alloc_token !0
+  br label %if.end
+
+if.else:
+  %call1 = call ptr @_Znwm(i64 4), !alloc_token !0
+  br label %if.end
+
+if.end:
+  %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
+  ret ptr %x.0
+}
+
+define ptr @test_merge_alloc_token_different(i1 %b) {
+; CHECK-LABEL: define ptr @test_merge_alloc_token_different(
+; CHECK-SAME: i1 [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @_Znwm(i64 4)
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
+entry:
+  br i1 %b, label %if.then, label %if.else
+
+if.then:
+  %call = call ptr @_Znwm(i64 4), !alloc_token !0
+  br label %if.end
+
+if.else:
+  %call1 = call ptr @_Znwm(i64 4), !alloc_token !1
+  br label %if.end
+
+if.end:
+  %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
+  ret ptr %x.0
+}
+
+define ptr @test_merge_alloc_token_some1(i1 %b) {
+; CHECK-LABEL: define ptr @test_merge_alloc_token_some1(
+; CHECK-SAME: i1 [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @_Znwm(i64 4)
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
+entry:
+  br i1 %b, label %if.then, label %if.else
+
+if.then:
+  %call = call ptr @_Znwm(i64 4), !alloc_token !0
+  br label %if.end
+
+if.else:
+  %call1 = call ptr @_Znwm(i64 4)
+  br label %if.end
+
+if.end:
+  %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
+  ret ptr %x.0
+}
+
+define ptr @test_merge_alloc_token_some2(i1 %b) {
+; CHECK-LABEL: define ptr @test_merge_alloc_token_some2(
+; CHECK-SAME: i1 [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @_Znwm(i64 4)
+; CHECK-NEXT:    ret ptr [[CALL]]
+;
+entry:
+  br i1 %b, label %if.then, label %if.else
+
+if.then:
+  %call = call ptr @_Znwm(i64 4)
+  br label %if.end
+
+if.else:
+  %call1 = call ptr @_Znwm(i64 4), !alloc_token !0
+  br label %if.end
+
+if.end:
+  %x.0 = phi ptr [ %call, %if.then ], [ %call1, %if.else ]
+  ret ptr %x.0
+}
+
+!0 = !{!"int"}
+!1 = !{!"char[4]"}
+;.
+; CHECK: [[META0]] = !{!"int"}
+;.
diff --git a/llvm/test/tools/llvm-ar/extract.test b/llvm/test/tools/llvm-ar/extract.test
index bf46cc0..f8be7fd 100644
--- a/llvm/test/tools/llvm-ar/extract.test
+++ b/llvm/test/tools/llvm-ar/extract.test
@@ -1,5 +1,4 @@
 ## Test extract operation.
-# XFAIL: target={{.*}}-darwin{{.*}}
 
 # RUN: rm -rf %t && mkdir -p %t/extracted/
 
@@ -9,7 +8,7 @@
 
 # RUN: echo filea > %t/a.txt
 # RUN: echo fileb > %t/b.txt
-# RUN: llvm-ar rc %t/archive.a %t/a.txt %t/b.txt
+# RUN: llvm-ar rc --format=gnu %t/archive.a %t/a.txt %t/b.txt
 
 ## Single member:
 # RUN: cd %t/extracted && llvm-ar xv %t/archive.a a.txt | FileCheck %s --check-prefix=A
diff --git a/llvm/test/tools/llvm-ar/print.test b/llvm/test/tools/llvm-ar/print.test
index 997c05f..c104fb4 100644
--- a/llvm/test/tools/llvm-ar/print.test
+++ b/llvm/test/tools/llvm-ar/print.test
@@ -1,12 +1,11 @@
 ## Test Print output
-# XFAIL: target={{.*}}-darwin{{.*}}
 
 # RUN: rm -rf %t && mkdir -p %t
 # RUN: echo file1 > %t/1.txt
 # RUN: echo file2 > %t/2.txt
 # RUN: echo file3 > %t/3.txt
 
-# RUN: llvm-ar -rc %t/archive.a %t/1.txt %t/2.txt %t/3.txt
+# RUN: llvm-ar -rc --format=gnu %t/archive.a %t/1.txt %t/2.txt %t/3.txt
 
 ## Print empty archive:
 # RUN: llvm-ar --format=gnu  cr %t/empty.a
diff --git a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s
index c8a5746..da83c54 100644
--- a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s
+++ b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s
@@ -2,8 +2,8 @@ REQUIRES: aarch64-registered-target
 // Flakey on SVE buildbots, disabled pending invesgitation.
 UNSUPPORTED: target={{.*}}
 
-RUN: llvm-exegesis -mtriple=aarch64 -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FMOVWSr --benchmark-phase=assemble-measured-code 2>&1
-RUN: llvm-objdump -d %d > %t.s
+RUN: llvm-exegesis -mtriple=aarch64 -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%t.obj --opcode-name=FMOVWSr --benchmark-phase=assemble-measured-code 2>&1
+RUN: llvm-objdump -d %t.obj > %t.s
 RUN: FileCheck %s < %t.s
 
 CHECK-NOT: ld{{[1-4]}}
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s
index bdc02d4..a540d7d 100644
--- a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s
+++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s
@@ -4,7 +4,7 @@ AMOAND_D:      ---
 AMOAND_D-NEXT: mode: latency
 AMOAND_D-NEXT: key:
 AMOAND_D-NEXT:   instructions:
-AMOAND_D-NEXT:     - 'AMOAND_D [[RE01:X[0-9]+]] X10 [[RE01:X[0-9]+]]'
+AMOAND_D-NEXT:     - 'AMOAND_D [[RE01:X[0-9]+]] [[RE01:X[0-9]+]] X10'
 AMOAND_D-NEXT: config: ''
 AMOAND_D-NEXT: register_initial_values:
 AMOAND_D-NEXT: - '[[RE01:X[0-9]+]]=0x0'
@@ -16,7 +16,7 @@ AMOADD_W:      ---
 AMOADD_W-NEXT: mode: latency
 AMOADD_W-NEXT: key:
 AMOADD_W-NEXT:   instructions:
-AMOADD_W-NEXT:     - 'AMOADD_W [[RE02:X[0-9]+]] X10 [[RE02:X[0-9]+]]'
+AMOADD_W-NEXT:     - 'AMOADD_W [[RE02:X[0-9]+]] [[RE02:X[0-9]+]] X10'
 AMOADD_W-NEXT: config: ''
 AMOADD_W-NEXT: register_initial_values:
 AMOADD_W-NEXT: - '[[RE02:X[0-9]+]]=0x0'
@@ -28,7 +28,7 @@ AMOMAXU_D:      ---
 AMOMAXU_D-NEXT: mode: latency
 AMOMAXU_D-NEXT: key:
 AMOMAXU_D-NEXT:   instructions:
-AMOMAXU_D-NEXT:     - 'AMOMAXU_D [[RE03:X[0-9]+]] X10 [[RE03:X[0-9]+]]'
+AMOMAXU_D-NEXT:     - 'AMOMAXU_D [[RE03:X[0-9]+]] [[RE03:X[0-9]+]] X10'
 AMOMAXU_D-NEXT: config: ''
 AMOMAXU_D-NEXT: register_initial_values:
 AMOMAXU_D-NEXT: - '[[RE03:X[0-9]+]]=0x0'
@@ -40,7 +40,7 @@ AMOMIN_W:      ---
 AMOMIN_W-NEXT: mode: latency
 AMOMIN_W-NEXT: key:
 AMOMIN_W-NEXT:   instructions:
-AMOMIN_W-NEXT:     - 'AMOMIN_W [[RE04:X[0-9]+]] X10 [[RE04:X[0-9]+]]'
+AMOMIN_W-NEXT:     - 'AMOMIN_W [[RE04:X[0-9]+]] [[RE04:X[0-9]+]] X10'
 AMOMIN_W-NEXT: config: ''
 AMOMIN_W-NEXT: register_initial_values:
 AMOMIN_W-NEXT: - '[[RE04:X[0-9]+]]=0x0'
@@ -52,7 +52,7 @@ AMOXOR_D:      ---
 AMOXOR_D-NEXT: mode: latency
 AMOXOR_D-NEXT: key:
 AMOXOR_D-NEXT:   instructions:
-AMOXOR_D-NEXT:     - 'AMOXOR_D [[RE05:X[0-9]+]] X10 [[RE05:X[0-9]+]]'
+AMOXOR_D-NEXT:     - 'AMOXOR_D [[RE05:X[0-9]+]] [[RE05:X[0-9]+]] X10'
 AMOXOR_D-NEXT: config: ''
 AMOXOR_D-NEXT: register_initial_values:
 AMOXOR_D-NEXT: - '[[RE05:X[0-9]+]]=0x0'
diff --git a/llvm/test/tools/llvm-mca/RISCV/SiFive7/vector-fp.s b/llvm/test/tools/llvm-mca/RISCV/SiFive7/vector-fp.s
new file mode 100644
index 0000000..b20206f
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/RISCV/SiFive7/vector-fp.s
@@ -0,0 +1,4848 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=riscv64 -mcpu=sifive-x280 -instruction-tables=full -iterations=1 < %s | FileCheck %s
+
+# The legal (SEW, LMUL) pairs for FP on sifive-x390 are:
+# (e16, mf4) (e16, mf2) (e16, m1) (e16, m2) (e16, m4) (e16, m8)
+# (e32, mf2) (e32, m1) (e32, m2) (e32, m4) (e32, m8)
+# (e64, m1) (e64, m2) (e64, m4) (e64, m8)
+# Widening instructions do not have e64
+
+# Vector Single-Width FP
+vsetvli zero, zero, e16, mf4, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, mf2, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, m1, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, m2, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, m4, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, m8, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, mf2, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, m1, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, m2, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, m4, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, m8, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e64, m1, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e64, m2, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e64, m4, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e64, m8, tu, mu
+vfadd.vv v8, v16, v24
+vfadd.vf v8, v16, f8
+vfsub.vv v8, v16, v24
+vfsub.vf v8, v16, f8
+vfrsub.vf v8, v16, f8
+vfmul.vv v8, v16, v24
+vfmul.vf v8, v16, f8
+vfdiv.vv v8, v16, v24
+vfdiv.vf v8, v16, f8
+vfrdiv.vf v8, v16, f8
+vfmacc.vv v8, v16, v24
+vfmacc.vf v8, f8, v24
+vfnmacc.vv v8, v16, v24
+vfnmacc.vf v8, f8, v24
+vfmsac.vv v8, v16, v24
+vfmsac.vf v8, f8, v24
+vfnmsac.vv v8, v16, v24
+vfnmsac.vf v8, f8, v24
+vfmadd.vv v8, v16, v24
+vfmadd.vf v8, f8, v24
+vfnmadd.vv v8, v16, v24
+vfnmadd.vf v8, f8, v24
+vfmsub.vv v8, v16, v24
+vfmsub.vf v8, f8, v24
+vfnmsub.vv v8, v16, v24
+vfnmsub.vf v8, f8, v24
+vfsqrt.v v8, v24
+vfrsqrt7.v v8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+# Vector Widening FP
+# no e64
+vsetvli zero, zero, e16, mf4, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, mf2, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, m1, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, m2, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, m4, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vsetvli zero, zero, e16, m8, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vsetvli zero, zero, e32, mf2, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vsetvli zero, zero, e32, m1, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vsetvli zero, zero, e32, m2, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vsetvli zero, zero, e32, m4, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vsetvli zero, zero, e32, m8, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e16, m8, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, mf2, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, m1, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, m2, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, m4, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+vsetvli zero, zero, e32, m8, tu, mu
+vfwadd.vv v8, v16, v24
+vfwadd.vf v8, v16, f8
+vfwsub.vv v8, v16, v24
+vfwsub.vf v8, v16, f8
+vfwadd.wv  v8, v16, v24
+vfwadd.wf  v8, v16, f8
+vfwsub.wv  v8, v16, v24
+vfwsub.wf  v8, v16, f8
+vfwmul.vv    v8, v16, v24
+vfwmul.vf    v8, v16, f8
+vfwmacc.vv v8, v16, v24
+vfwmacc.vf v8, f8, v24
+vfwnmacc.vv v8, v16, v24
+vfwnmacc.vf v8, f8, v24
+vfwmsac.vv v8, v16, v24
+vfwmsac.vf v8, f8, v24
+vfwnmsac.vv v8, v16, v24
+vfwnmsac.vf v8, f8, v24
+vfrec7.v v8, v24
+vfmin.vv v8, v16, v24
+vfmin.vf v8, v16, f8
+vfmax.vv v8, v16, v24
+vfmax.vf v8, v16, f8
+vfsgnj.vv v8, v16, v24
+vfsgnj.vf v8, v16, f8
+vfsgnjn.vv v8, v16, v24
+vfsgnjn.vf v8, v16, f8
+vfsgnjx.vv v8, v16, v24
+vfsgnjx.vf v8, v16, f8
+vfcvt.xu.f.v v8, v16
+vfcvt.x.f.v  v8, v16
+vfcvt.rtz.xu.f.v v8, v16
+vfcvt.rtz.x.f.v  v8, v16
+vfcvt.f.xu.v v8, v16
+vfcvt.f.x.v  v8, v16
+vfwcvt.xu.f.v v8, v16
+vfwcvt.x.f.v  v8, v16
+vfwcvt.rtz.xu.f.v v8, v16
+vfwcvt.rtz.x.f.v  v8, v16
+vfwcvt.f.xu.v v8, v16
+vfwcvt.f.x.v  v8, v16
+vfwcvt.f.f.v v8, v16
+vfncvt.xu.f.w v8, v16
+vfncvt.x.f.w  v8, v16
+vfncvt.rtz.xu.f.w v8, v16
+vfncvt.rtz.x.f.w  v8, v16
+vfncvt.f.xu.w v8, v16
+vfncvt.f.x.w  v8, v16
+vfncvt.f.f.w v8, v16
+vfncvt.rod.f.f.w v8, v16
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - VLEN512SiFive7FDiv:1
+# CHECK-NEXT: [1]   - VLEN512SiFive7IDiv:1
+# CHECK-NEXT: [2]   - VLEN512SiFive7PipeA:1
+# CHECK-NEXT: [3]   - VLEN512SiFive7PipeAB:2 VLEN512SiFive7PipeA, VLEN512SiFive7PipeB
+# CHECK-NEXT: [4]   - VLEN512SiFive7PipeB:1
+# CHECK-NEXT: [5]   - VLEN512SiFive7VA:1
+# CHECK-NEXT: [6]   - VLEN512SiFive7VCQ:1
+# CHECK-NEXT: [7]   - VLEN512SiFive7VL:1
+# CHECK-NEXT: [8]   - VLEN512SiFive7VS:1
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+# CHECK-NEXT: [7]: Bypass Latency
+# CHECK-NEXT: [8]: Resources (<Name> | <Name>[<ReleaseAtCycle>] | <Name>[<AcquireAtCycle>,<ReleaseAtCycle])
+# CHECK-NEXT: [9]: LLVM Opcode Name
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]                                        [9]                        Instructions:
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      30    30.00                        30    VLEN512SiFive7VA[1,31],VLEN512SiFive7VCQ   VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      30    30.00                        30    VLEN512SiFive7VA[1,31],VLEN512SiFive7VCQ   VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      30    30.00                        30    VLEN512SiFive7VA[1,31],VLEN512SiFive7VCQ   VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      30    30.00                        30    VLEN512SiFive7VA[1,31],VLEN512SiFive7VCQ   VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      60    60.00                        60    VLEN512SiFive7VA[1,61],VLEN512SiFive7VCQ   VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      60    60.00                        60    VLEN512SiFive7VA[1,61],VLEN512SiFive7VCQ   VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      60    60.00                        60    VLEN512SiFive7VA[1,61],VLEN512SiFive7VCQ   VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      60    60.00                        60    VLEN512SiFive7VA[1,61],VLEN512SiFive7VCQ   VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      120   120.00                       120   VLEN512SiFive7VA[1,121],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      120   120.00                       120   VLEN512SiFive7VA[1,121],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      120   120.00                       120   VLEN512SiFive7VA[1,121],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      120   120.00                       120   VLEN512SiFive7VA[1,121],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      240   240.00                       240   VLEN512SiFive7VA[1,241],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      240   240.00                       240   VLEN512SiFive7VA[1,241],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      240   240.00                       240   VLEN512SiFive7VA[1,241],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      240   240.00                       240   VLEN512SiFive7VA[1,241],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      480   480.00                       480   VLEN512SiFive7VA[1,481],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      480   480.00                       480   VLEN512SiFive7VA[1,481],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      480   480.00                       480   VLEN512SiFive7VA[1,481],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      480   480.00                       480   VLEN512SiFive7VA[1,481],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      960   960.00                       960   VLEN512SiFive7VA[1,961],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      960   960.00                       960   VLEN512SiFive7VA[1,961],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      960   960.00                       960   VLEN512SiFive7VA[1,961],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      960   960.00                       960   VLEN512SiFive7VA[1,961],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      56    56.00                        56    VLEN512SiFive7VA[1,57],VLEN512SiFive7VCQ   VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      56    56.00                        56    VLEN512SiFive7VA[1,57],VLEN512SiFive7VCQ   VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      56    56.00                        56    VLEN512SiFive7VA[1,57],VLEN512SiFive7VCQ   VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      56    56.00                        56    VLEN512SiFive7VA[1,57],VLEN512SiFive7VCQ   VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      112   112.00                       112   VLEN512SiFive7VA[1,113],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      112   112.00                       112   VLEN512SiFive7VA[1,113],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      112   112.00                       112   VLEN512SiFive7VA[1,113],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      112   112.00                       112   VLEN512SiFive7VA[1,113],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      224   224.00                       224   VLEN512SiFive7VA[1,225],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      224   224.00                       224   VLEN512SiFive7VA[1,225],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      224   224.00                       224   VLEN512SiFive7VA[1,225],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      224   224.00                       224   VLEN512SiFive7VA[1,225],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      448   448.00                       448   VLEN512SiFive7VA[1,449],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      448   448.00                       448   VLEN512SiFive7VA[1,449],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      448   448.00                       448   VLEN512SiFive7VA[1,449],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      448   448.00                       448   VLEN512SiFive7VA[1,449],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      896   896.00                       896   VLEN512SiFive7VA[1,897],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      896   896.00                       896   VLEN512SiFive7VA[1,897],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      896   896.00                       896   VLEN512SiFive7VA[1,897],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      896   896.00                       896   VLEN512SiFive7VA[1,897],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      114   114.00                       114   VLEN512SiFive7VA[1,115],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      114   114.00                       114   VLEN512SiFive7VA[1,115],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      114   114.00                       114   VLEN512SiFive7VA[1,115],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      114   114.00                       114   VLEN512SiFive7VA[1,115],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      228   228.00                       228   VLEN512SiFive7VA[1,229],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      228   228.00                       228   VLEN512SiFive7VA[1,229],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      228   228.00                       228   VLEN512SiFive7VA[1,229],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      228   228.00                       228   VLEN512SiFive7VA[1,229],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      456   456.00                       456   VLEN512SiFive7VA[1,457],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      456   456.00                       456   VLEN512SiFive7VA[1,457],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      456   456.00                       456   VLEN512SiFive7VA[1,457],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      456   456.00                       456   VLEN512SiFive7VA[1,457],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFADD_VV                   vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFADD_VF                   vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSUB_VV                   vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSUB_VF                   vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFRSUB_VF                  vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMUL_VV                   vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMUL_VF                   vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      912   912.00                       912   VLEN512SiFive7VA[1,913],VLEN512SiFive7VCQ  VFDIV_VV                   vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  1      912   912.00                       912   VLEN512SiFive7VA[1,913],VLEN512SiFive7VCQ  VFDIV_VF                   vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      912   912.00                       912   VLEN512SiFive7VA[1,913],VLEN512SiFive7VCQ  VFRDIV_VF                  vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMACC_VV                  vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMACC_VF                  vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMACC_VV                 vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMACC_VF                 vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSAC_VV                  vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSAC_VF                  vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSAC_VV                 vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSAC_VF                 vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMADD_VV                  vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMADD_VF                  vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMADD_VV                 vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMADD_VF                 vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSUB_VV                  vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMSUB_VF                  vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSUB_VV                 vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNMSUB_VF                 vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  1      912   912.00                       912   VLEN512SiFive7VA[1,913],VLEN512SiFive7VCQ  VFSQRT_V                   vfsqrt.v	v8, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFRSQRT7_V                 vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     1.00                         4     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     1.00                         8     VLEN512SiFive7VA[1,2],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     2.00                         4     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     2.00                         8     VLEN512SiFive7VA[1,3],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     4.00                         4     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     4.00                         8     VLEN512SiFive7VA[1,5],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     8.00                         4     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  1      3     1.00                  U      1     VLEN512SiFive7PipeA,VLEN512SiFive7PipeAB   VSETVLI                    vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VV                  vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_VF                  vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VV                  vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_VF                  vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WV                  vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWADD_WF                  vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WV                  vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWSUB_WF                  vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VV                  vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMUL_VF                  vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VV                 vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMACC_VF                 vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VV                vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMACC_VF                vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VV                 vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWMSAC_VF                 vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VV                vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWNMSAC_VF                vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFREC7_V                   vfrec7.v	v8, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VV                   vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMIN_VF                   vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VV                   vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFMAX_VF                   vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VV                  vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJ_VF                  vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VV                 vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJN_VF                 vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VV                 vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  1      4     16.00                        4     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFSGNJX_VF                 vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_XU_F_V               vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_X_F_V                vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_XU_F_V           vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_RTZ_X_F_V            vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_XU_V               vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFCVT_F_X_V                vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_XU_F_V              vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_X_F_V               vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_XU_F_V          vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_RTZ_X_F_V           vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_XU_V              vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_X_V               vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  1      8     8.00                         8     VLEN512SiFive7VA[1,9],VLEN512SiFive7VCQ    VFWCVT_F_F_V               vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_XU_F_W              vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_X_F_W               vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_XU_F_W          vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_RTZ_X_F_W           vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_XU_W              vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_X_W               vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_F_F_W               vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  1      8     16.00                        8     VLEN512SiFive7VA[1,17],VLEN512SiFive7VCQ   VFNCVT_ROD_F_F_W           vfncvt.rod.f.f.w	v8, v16
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - VLEN512SiFive7FDiv
+# CHECK-NEXT: [1]   - VLEN512SiFive7IDiv
+# CHECK-NEXT: [2]   - VLEN512SiFive7PipeA
+# CHECK-NEXT: [3]   - VLEN512SiFive7PipeB
+# CHECK-NEXT: [4]   - VLEN512SiFive7VA
+# CHECK-NEXT: [5]   - VLEN512SiFive7VCQ
+# CHECK-NEXT: [6]   - VLEN512SiFive7VL
+# CHECK-NEXT: [7]   - VLEN512SiFive7VS
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
+# CHECK-NEXT:  -      -     32.00   -     32088.00 1558.00  -   -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     31.00  1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     31.00  1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     31.00  1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     31.00  1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     61.00  1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     61.00  1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     61.00  1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     61.00  1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     121.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     121.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     121.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     121.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     241.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     241.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     241.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     241.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     481.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     481.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     481.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     481.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     961.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     961.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     961.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     961.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     57.00  1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     57.00  1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     57.00  1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     57.00  1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     113.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     113.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     113.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     113.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     225.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     225.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     225.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     225.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     449.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     449.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     449.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     449.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     897.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     897.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     897.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     897.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     115.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     115.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     115.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     115.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     229.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     229.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     229.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     229.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     457.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     457.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     457.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     457.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e64, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     913.00 1.00    -      -     vfdiv.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     913.00 1.00    -      -     vfdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     913.00 1.00    -      -     vfrdiv.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmadd.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfnmsub.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     913.00 1.00    -      -     vfsqrt.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrsqrt7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf4, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e16, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, mf2, tu, mu
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     2.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m1, tu, mu
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     3.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m2, tu, mu
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     5.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m4, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vsetvli	zero, zero, e32, m8, tu, mu
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwadd.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwsub.wf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmul.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmacc.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwnmsac.vf	v8, fs0, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfrec7.v	v8, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmin.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfmax.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnj.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjn.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vv	v8, v16, v24
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfsgnjx.vf	v8, v16, fs0
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.xu.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.rtz.x.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.xu.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.x.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     9.00   1.00    -      -     vfwcvt.f.f.v	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.xu.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rtz.x.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.xu.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.x.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.f.f.w	v8, v16
+# CHECK-NEXT:  -      -      -      -     17.00  1.00    -      -     vfncvt.rod.f.f.w	v8, v16
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s
index 8838c86..ecd96a3 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/atomic.s
@@ -126,19 +126,19 @@ amomaxu.d.aqrl s5, s4, (s3)
 # CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_W                       lr.w	t0, (t1)
 # CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_W_AQ                    lr.w.aq	t1, (t2)
 # CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_W_RL                    lr.w.rl	t2, (t3)
-# CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_W_AQ_RL                 lr.w.aqrl	t3, (t4)
+# CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_W_AQRL                  lr.w.aqrl	t3, (t4)
 # CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_W                       sc.w	t6, t5, (t4)
 # CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_W_AQ                    sc.w.aq	t5, t4, (t3)
 # CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_W_RL                    sc.w.rl	t4, t3, (t2)
-# CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_W_AQ_RL                 sc.w.aqrl	t3, t2, (t1)
+# CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_W_AQRL                  sc.w.aqrl	t3, t2, (t1)
 # CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_D                       lr.d	t0, (t1)
 # CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_D_AQ                    lr.d.aq	t1, (t2)
 # CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_D_RL                    lr.d.rl	t2, (t3)
-# CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_D_AQ_RL                 lr.d.aqrl	t3, (t4)
+# CHECK-NEXT:  1      8     0.50    *                    8     SMX60_LS                                   LR_D_AQRL                  lr.d.aqrl	t3, (t4)
 # CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_D                       sc.d	t6, t5, (t4)
 # CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_D_AQ                    sc.d.aq	t5, t4, (t3)
 # CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_D_RL                    sc.d.rl	t4, t3, (t2)
-# CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_D_AQ_RL                 sc.d.aqrl	t3, t2, (t1)
+# CHECK-NEXT:  1      8     0.50           *             8     SMX60_LS                                   SC_D_AQRL                  sc.d.aqrl	t3, t2, (t1)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOSWAP_W                  amoswap.w	a4, ra, (s0)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOADD_W                   amoadd.w	a1, a2, (a3)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOXOR_W                   amoxor.w	a2, a3, (a4)
@@ -166,15 +166,15 @@ amomaxu.d.aqrl s5, s4, (s3)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAX_W_RL                amomax.w.rl	s7, s6, (s5)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMINU_W_RL               amominu.w.rl	s6, s5, (s4)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAXU_W_RL               amomaxu.w.rl	s5, s4, (s3)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOSWAP_W_AQ_RL            amoswap.w.aqrl	a4, ra, (s0)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOADD_W_AQ_RL             amoadd.w.aqrl	a1, a2, (a3)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOXOR_W_AQ_RL             amoxor.w.aqrl	a2, a3, (a4)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOAND_W_AQ_RL             amoand.w.aqrl	a3, a4, (a5)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOOR_W_AQ_RL              amoor.w.aqrl	a4, a5, (a6)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMIN_W_AQ_RL             amomin.w.aqrl	a5, a6, (a7)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAX_W_AQ_RL             amomax.w.aqrl	s7, s6, (s5)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMINU_W_AQ_RL            amominu.w.aqrl	s6, s5, (s4)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAXU_W_AQ_RL            amomaxu.w.aqrl	s5, s4, (s3)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOSWAP_W_AQRL             amoswap.w.aqrl	a4, ra, (s0)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOADD_W_AQRL              amoadd.w.aqrl	a1, a2, (a3)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOXOR_W_AQRL              amoxor.w.aqrl	a2, a3, (a4)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOAND_W_AQRL              amoand.w.aqrl	a3, a4, (a5)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOOR_W_AQRL               amoor.w.aqrl	a4, a5, (a6)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMIN_W_AQRL              amomin.w.aqrl	a5, a6, (a7)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAX_W_AQRL              amomax.w.aqrl	s7, s6, (s5)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMINU_W_AQRL             amominu.w.aqrl	s6, s5, (s4)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAXU_W_AQRL             amomaxu.w.aqrl	s5, s4, (s3)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOSWAP_D                  amoswap.d	a4, ra, (s0)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOADD_D                   amoadd.d	a1, a2, (a3)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOXOR_D                   amoxor.d	a2, a3, (a4)
@@ -202,15 +202,15 @@ amomaxu.d.aqrl s5, s4, (s3)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAX_D_RL                amomax.d.rl	s7, s6, (s5)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMINU_D_RL               amominu.d.rl	s6, s5, (s4)
 # CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAXU_D_RL               amomaxu.d.rl	s5, s4, (s3)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOSWAP_D_AQ_RL            amoswap.d.aqrl	a4, ra, (s0)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOADD_D_AQ_RL             amoadd.d.aqrl	a1, a2, (a3)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOXOR_D_AQ_RL             amoxor.d.aqrl	a2, a3, (a4)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOAND_D_AQ_RL             amoand.d.aqrl	a3, a4, (a5)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOOR_D_AQ_RL              amoor.d.aqrl	a4, a5, (a6)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMIN_D_AQ_RL             amomin.d.aqrl	a5, a6, (a7)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAX_D_AQ_RL             amomax.d.aqrl	s7, s6, (s5)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMINU_D_AQ_RL            amominu.d.aqrl	s6, s5, (s4)
-# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAXU_D_AQ_RL            amomaxu.d.aqrl	s5, s4, (s3)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOSWAP_D_AQRL             amoswap.d.aqrl	a4, ra, (s0)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOADD_D_AQRL              amoadd.d.aqrl	a1, a2, (a3)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOXOR_D_AQRL              amoxor.d.aqrl	a2, a3, (a4)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOAND_D_AQRL              amoand.d.aqrl	a3, a4, (a5)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOOR_D_AQRL               amoor.d.aqrl	a4, a5, (a6)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMIN_D_AQRL              amomin.d.aqrl	a5, a6, (a7)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAX_D_AQRL              amomax.d.aqrl	s7, s6, (s5)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMINU_D_AQRL             amominu.d.aqrl	s6, s5, (s4)
+# CHECK-NEXT:  1      12    0.50    *      *             12    SMX60_LS                                   AMOMAXU_D_AQRL             amomaxu.d.aqrl	s5, s4, (s3)
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SMX60_FP
diff --git a/llvm/test/tools/llvm-offload-binary/llvm-offload-binary.ll b/llvm/test/tools/llvm-offload-binary/llvm-offload-binary.ll
new file mode 100644
index 0000000..c6027b3
--- /dev/null
+++ b/llvm/test/tools/llvm-offload-binary/llvm-offload-binary.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-offload-binary -o %t --image=file=%s,arch=abc,triple=x-y-z
+; RUN: llvm-objdump --offloading %t | FileCheck %s
+; RUN: llvm-offload-binary %t --image=file=%t2,arch=abc,triple=x-y-z
+; RUN: diff %s %t2
+
+;      CHECK: OFFLOADING IMAGE [0]:
+; CHECK-NEXT: kind            <none>
+; CHECK-NEXT: arch            abc
+; CHECK-NEXT: triple          x-y-z
+; CHECK-NEXT: producer        none
+
+; RUN: llvm-offload-binary -o %t3 --image=file=%s
+; RUN: llvm-offload-binary %t3 --image=file=%t4
+; RUN: diff %s %t4
diff --git a/llvm/test/tools/llvm-reduce/inline-call-sites-cost.ll b/llvm/test/tools/llvm-reduce/inline-call-sites-cost.ll
new file mode 100644
index 0000000..fc25ca4
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/inline-call-sites-cost.ll
@@ -0,0 +1,95 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=inline-call-sites -reduce-callsite-inline-threshold=3 --test FileCheck --test-arg --check-prefix=CHECK --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck -check-prefixes=RESULT,CHECK %s < %t
+
+declare void @extern_b()
+declare void @extern_a()
+
+; RESULT: @gv_init = global ptr @no_inline_noncall_user
+@gv_init = global ptr @no_inline_noncall_user
+
+
+; CHECK-LABEL: define void @no_inline_noncall_user(
+define void @no_inline_noncall_user() {
+  call void @extern_a()
+  call void @extern_a()
+  call void @extern_a()
+  call void @extern_a()
+  ret void
+}
+
+; RESULT-LABEL: define void @noncall_user_call() {
+; RESULT-NEXT: call void @no_inline_noncall_user()
+; RESULT-NEXT: ret void
+define void @noncall_user_call() {
+  call void @no_inline_noncall_user()
+  ret void
+}
+
+; RESULT-LABEL: define void @big_callee_small_caller_callee() {
+define void @big_callee_small_caller_callee() {
+  call void @extern_a()
+  call void @extern_a()
+  call void @extern_a()
+  call void @extern_a()
+  ret void
+}
+
+; RESULT-LABEL: define void @big_callee_small_caller_caller() {
+; RESULT-NEXT: call void @extern_b()
+; RESULT-NEXT: call void @extern_a()
+; RESULT-NEXT: call void @extern_a()
+; RESULT-NEXT: call void @extern_a()
+; RESULT-NEXT: call void @extern_a()
+; RESULT-NEXT: ret void
+define void @big_callee_small_caller_caller() {
+  call void @extern_b()
+  call void @big_callee_small_caller_callee()
+  ret void
+}
+
+; RESULT-LABEL: define void @small_callee_big_caller_callee() {
+; RESULT-NEXT: call void @extern_a()
+; RESULT-NEXT: ret void
+define void @small_callee_big_caller_callee() {
+  call void @extern_a()
+  ret void
+}
+
+; RESULT-LABEL: define void @small_callee_big_caller_caller() {
+; RESULT-NEXT: call void @extern_b()
+; RESULT-NEXT: call void @extern_a()
+; RESULT-NEXT: call void @extern_b()
+; RESULT-NEXT: call void @extern_b()
+; RESULT-NEXT: ret void
+define void @small_callee_big_caller_caller() {
+  call void @extern_b()
+  call void @small_callee_big_caller_callee()
+  call void @extern_b()
+  call void @extern_b()
+  ret void
+}
+
+; RESULT-LABEL: define void @big_callee_big_caller_callee() {
+define void @big_callee_big_caller_callee() {
+  call void @extern_a()
+  call void @extern_a()
+  call void @extern_a()
+  call void @extern_a()
+  ret void
+}
+
+; RESULT-LABEL: define void @big_callee_big_caller_caller() {
+; RESULT-NEXT: call void @extern_b()
+; RESULT-NEXT: call void @big_callee_big_caller_callee()
+; RESULT-NEXT: call void @extern_b()
+; RESULT-NEXT: call void @extern_b()
+; RESULT-NEXT: call void @extern_b()
+; RESULT-NEXT: ret void
+define void @big_callee_big_caller_caller() {
+  call void @extern_b()
+  call void @big_callee_big_caller_callee()
+  call void @extern_b()
+  call void @extern_b()
+  call void @extern_b()
+  ret void
+}
diff --git a/llvm/test/tools/llvm-reduce/inline-call-sites.ll b/llvm/test/tools/llvm-reduce/inline-call-sites.ll
new file mode 100644
index 0000000..34775d9
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/inline-call-sites.ll
@@ -0,0 +1,765 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=inline-call-sites -reduce-callsite-inline-threshold=-1 --test FileCheck --test-arg --check-prefixes=CHECK,INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck -check-prefixes=RESULT,CHECK %s < %t
+
+; RESULT: @gv = global [2 x ptr] [ptr @only_gv_user, ptr @simple_callee]
+@gv = global [2 x ptr] [ptr @only_gv_user, ptr @simple_callee]
+
+; RESULT: @indirectbr.L = internal unnamed_addr constant [3 x ptr] [ptr blockaddress(@callee_with_indirectbr, %L1), ptr blockaddress(@callee_with_indirectbr, %L2), ptr null], align 8
+@indirectbr.L = internal unnamed_addr constant [3 x ptr] [ptr blockaddress(@callee_with_indirectbr, %L1), ptr blockaddress(@callee_with_indirectbr, %L2), ptr null], align 8
+
+
+; CHECK-LABEL: define void @simple_callee(
+; RESULT-NEXT: store i32 123, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @simple_callee(ptr %arg) {
+  store i32 123, ptr %arg
+  ret void
+}
+
+; CHECK-LABEL: define void @simple_caller(
+; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4
+; RESULT-NEXT: ret void
+define void @simple_caller(ptr %outer.arg) {
+  call void @simple_callee(ptr %outer.arg)
+  ret void
+}
+
+; CHECK-LABEL: define void @multi_simple_caller(
+; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4
+; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4
+; RESULT-NEXT: store i32 123, ptr null, align 4
+; RESULT-NEXT: ret void
+define void @multi_simple_caller(ptr %outer.arg) {
+  call void @simple_callee(ptr %outer.arg)
+  call void @simple_callee(ptr %outer.arg)
+  call void @simple_callee(ptr null)
+  ret void
+}
+
+; CHECK-LABEL: define void @only_gv_user(
+; RESULT-NEXT: store i32 666, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @only_gv_user(ptr %arg) {
+  store i32 666, ptr %arg
+  ret void
+}
+
+; CHECK-LABEL: define void @recursive(
+; RESULT-NEXT: call void @recursive(ptr %arg)
+; RESULT-NEXT: ret void
+define void @recursive(ptr %arg) {
+  call void @recursive(ptr %arg)
+  ret void
+}
+
+; CHECK-LABEL: define void @recursive_with_wrong_callsite_type(
+; RESULT-NEXT: call void @recursive_with_wrong_callsite_type(ptr %arg, i32 2)
+; RESULT-NEXT: ret void
+define void @recursive_with_wrong_callsite_type(ptr %arg) {
+  call void @recursive_with_wrong_callsite_type(ptr %arg, i32 2)
+  ret void
+}
+
+; CHECK-LABEL: define void @non_callee_use(
+; RESULT-NEXT: store i32 567, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @non_callee_use(ptr %arg) {
+  store i32 567, ptr %arg
+  ret void
+}
+
+declare void @extern_ptr_use(ptr)
+
+; CHECK-LABEL: define void @non_callee_user(
+; RESULT-NEXT: call void @extern_ptr_use(ptr @non_callee_use)
+; RESULT-NEXT: ret void
+define void @non_callee_user() {
+  call void @extern_ptr_use(ptr @non_callee_use)
+  ret void
+}
+
+; CHECK-LABEL: define void @non_call_inst_use(
+define void @non_call_inst_use(ptr %arg) {
+  store i32 999, ptr %arg
+  ret void
+}
+
+; CHECK-LABEL: define void @non_call_inst_user(
+; RESULT-NEXT: store ptr @non_call_inst_use, ptr %arg, align 8
+; RESULT-NEXT: ret void
+define void @non_call_inst_user(ptr %arg) {
+  store ptr @non_call_inst_use, ptr %arg
+  ret void
+}
+
+; CHECK-LABEL: define i32 @used_wrong_call_type(
+; RESULT-NEXT: store i32 123, ptr %arg, align 4
+; RESULT-NEXT: ret i32 8
+define i32 @used_wrong_call_type(ptr %arg) {
+  store i32 123, ptr %arg
+  ret i32 8
+}
+
+; Inlining doesn't support the UB cases
+; CHECK-LABEL: define void @use_wrong_call_type(
+; RESULT-NEXT: call void @used_wrong_call_type(ptr %outer.arg)
+; RESULT-NEXT: ret void
+define void @use_wrong_call_type(ptr %outer.arg) {
+  call void @used_wrong_call_type(ptr %outer.arg)
+  ret void
+}
+
+; INTERESTING-LABEL: define void @incompatible_gc_callee(
+
+; RESULT-LABEL: define void @incompatible_gc_callee(ptr %arg) gc "gc0" {
+; RESULT-NEXT: store i32 10000, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @incompatible_gc_callee(ptr %arg) gc "gc0" {
+  store i32 10000, ptr %arg
+  ret void
+}
+
+; INTERESTING-LABEL: define void @incompatible_gc_caller(
+
+; RESULT-LABEL: define void @incompatible_gc_caller(ptr %outer.arg) gc "gc1" {
+; RESULT-NEXT: call void @incompatible_gc_callee(ptr %outer.arg)
+; RESULT-NEXT: ret void
+define void @incompatible_gc_caller(ptr %outer.arg) gc "gc1" {
+  call void @incompatible_gc_callee(ptr %outer.arg)
+  ret void
+}
+
+; INTERESTING-LABEL: define void @propagate_callee_gc(
+
+; RESULT-LABEL: define void @propagate_callee_gc(ptr %arg) gc "propagate-gc" {
+; RESULT-NEXT: store i32 10000, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @propagate_callee_gc(ptr %arg) gc "propagate-gc" {
+  store i32 10000, ptr %arg
+  ret void
+}
+
+; INTERESTING-LABEL: define void @propagate_caller_gc(
+
+; RESULT-LABEL: define void @propagate_caller_gc(ptr %arg) gc "propagate-gc" {
+; RESULT-NEXT: store i32 10000, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @propagate_caller_gc(ptr %arg)  {
+  call void @propagate_callee_gc(ptr %arg)
+  ret void
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; INTERESTING-LABEL: define void @propagate_callee_personality(
+
+; RESULT-LABEL: define void @propagate_callee_personality(ptr %arg) personality ptr @__gxx_personality_v0 {
+; RESULT-NEXT: store i32 2000, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @propagate_callee_personality(ptr %arg) personality ptr @__gxx_personality_v0 {
+  store i32 2000, ptr %arg
+  ret void
+}
+
+; INTERESTING-LABEL: define void @propagate_caller_personality(
+
+; RESULT-LABEL: define void @propagate_caller_personality(ptr %arg) personality ptr @__gxx_personality_v0 {
+; RESULT-NEXT: store i32 2000, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @propagate_caller_personality(ptr %arg)  {
+  call void @propagate_callee_personality(ptr %arg)
+  ret void
+}
+
+; CHECK-LABEL: define void @callee_with_indirectbr(
+define void @callee_with_indirectbr() {
+entry:
+  br label %L1
+
+L1:                                               ; preds = %entry, %L1
+  %i = phi i32 [ 0, %entry ], [ %inc, %L1 ]
+  %inc = add i32 %i, 1
+  %idxprom = zext i32 %i to i64
+  %arrayidx = getelementptr inbounds [3 x ptr], ptr @indirectbr.L, i64 0, i64 %idxprom
+  %brtarget = load ptr, ptr %arrayidx, align 8
+  indirectbr ptr %brtarget, [label %L1, label %L2]
+
+L2:                                               ; preds = %L1
+  ret void
+}
+
+; CHECK-LABEL: define void @calls_func_with_indirectbr(
+
+; RESULT: L1.i:
+; RESULT-NEXT: %i.i = phi i32 [ 0, %call ], [ %inc.i, %L1.i ]
+; RESULT-NEXT: %inc.i = add i32 %i.i, 1
+; RESULT-NEXT: %idxprom.i = zext i32 %i.i to i64
+; RESULT-NEXT: %arrayidx.i = getelementptr inbounds [3 x ptr], ptr @indirectbr.L, i64 0, i64 %idxprom.i
+; RESULT-NEXT: %brtarget.i = load ptr, ptr %arrayidx.i, align 8
+; RESULT-NEXT: indirectbr ptr %brtarget.i, [label %L1.i, label %callee_with_indirectbr.exit]
+
+define void @calls_func_with_indirectbr(i1 %arg0) {
+entry:
+  br i1 %arg0, label %call, label %ret
+
+call:
+  call void @callee_with_indirectbr()
+  br label %ret
+
+ret:
+  ret void
+}
+
+
+; CHECK-LABEL: define ptr @callee_with_blockaddress_use(
+; RESULT: L2:
+; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L1), ptr %alloca, align 8
+; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L2), ptr %alloca, align 8
+; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L3), ptr %alloca, align 8
+; RESULT-NEXT: %cond1 = load volatile i1, ptr addrspace(1) null
+; RESULT-NEXT: br i1 %cond1, label %L1, label %L3
+define ptr @callee_with_blockaddress_use() {
+entry:
+  %alloca = alloca ptr
+  %cond0 = load volatile i1, ptr addrspace(1) null
+  br i1 %cond0, label %L1, label %L2
+
+L1:
+  br label %L2
+
+L2:
+  ; reference an earlier block
+  store ptr blockaddress(@callee_with_blockaddress_use, %L1), ptr %alloca
+
+  ; reference the block itself from the block
+  store ptr blockaddress(@callee_with_blockaddress_use, %L2), ptr %alloca
+
+  ; reference a later block
+  store ptr blockaddress(@callee_with_blockaddress_use, %L3), ptr %alloca
+
+  %cond1 = load volatile i1, ptr addrspace(1) null
+  br i1 %cond1, label %L1, label %L3
+
+L3:
+  %load = load ptr, ptr %alloca
+  ret ptr %load
+}
+
+; FIXME: This is not correctly remapping the blockaddress use
+; CHECK-LABEL: define void @calls_func_with_blockaddress_use(
+; RESULT: entry:
+; RESULT-NEXT: %alloca.i = alloca ptr, align 8
+; RESULT-NEXT: store i32 1000, ptr null, align 4
+; RESULT-NEXT: br i1 %arg0, label %call, label %ret
+
+; RESULT: call:
+; RESULT-NEXT: store i32 2000, ptr null, align 4
+; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %alloca.i)
+; RESULT-NEXT: %cond0.i = load volatile i1, ptr addrspace(1) null, align 1
+; RESULT-NEXT: br i1 %cond0.i, label %L1.i, label %L2.i
+
+; RESULT: L1.i: ; preds = %L2.i, %call
+; RESULT-NEXT: br label %L2.i
+
+; RESULT: L2.i:                                             ; preds = %L1.i, %call
+; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L1), ptr %alloca.i, align 8
+; RESULT-NEXT:   store ptr blockaddress(@calls_func_with_blockaddress_use, %L2.i), ptr %alloca.i, align 8
+; RESULT-NEXT: store ptr blockaddress(@callee_with_blockaddress_use, %L3), ptr %alloca.i, align 8
+; RESULT-NEXT: %cond1.i = load volatile i1, ptr addrspace(1) null, align 1
+; RESULT-NEXT: br i1 %cond1.i, label %L1.i, label %callee_with_blockaddress_use.exit
+
+; RESULT: callee_with_blockaddress_use.exit:                ; preds = %L2.i
+; RESULT-NEXT: %load.i = load ptr, ptr %alloca.i, align 8
+; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %alloca.i)
+; RESULT-NEXT: store i32 3000, ptr null, align 4
+; RESULT-NEXT: br label %ret
+
+; RESULT: ret: ; preds = %callee_with_blockaddress_use.exit, %entry
+; RESULT-NEXT: store i32 4000, ptr null, align 4
+; RESULT-NEXT: ret void
+define void @calls_func_with_blockaddress_use(i1 %arg0) {
+entry:
+  store i32 1000, ptr null
+  br i1 %arg0, label %call, label %ret
+
+call:
+  store i32 2000, ptr null
+  call ptr @callee_with_blockaddress_use()
+  store i32 3000, ptr null
+  br label %ret
+
+ret:
+  store i32 4000, ptr null
+  ret void
+}
+
+; CHECK-LABEL: define void @callee_with_fallthrough_blockaddress_use(
+; RESULT: L2:
+; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L1), ptr %alloca, align 8
+; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L2), ptr %alloca, align 8
+; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L3), ptr %alloca, align 8
+; RESULT-NEXT: br label %L3
+define void @callee_with_fallthrough_blockaddress_use() {
+entry:
+  %alloca = alloca ptr
+  br label %L1
+
+L1:
+  store i32 999, ptr null
+  br label %L2
+
+L2:                                               ; preds = %entry, %L1
+  ; reference a block before this block
+  store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L1), ptr %alloca
+
+  ; reference the block itself from the block
+  store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L2), ptr %alloca
+
+  ; reference a block after this block
+  store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L3), ptr %alloca
+  br label %L3
+
+L3:                                               ; preds = %L1
+  %load = load ptr, ptr %alloca
+  ret void
+}
+
+
+; CHECK-LABEL: define void @calls_func_with_fallthrough_blockaddress_use(
+; RESULT: entry:
+; RESULT-NEXT: %alloca.i = alloca ptr, align 8
+; RESULT-NEXT: store i32 1000, ptr null
+; RESULT-NEXT: br i1 %arg0, label %call, label %ret
+
+; RESULT: call:
+; RESULT-NEXT: store i32 2000, ptr null, align 4
+; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %alloca.i)
+; RESULT-NEXT: br label %L1.i
+
+; RESULT: L1.i: ; preds = %call
+; RESULT-NEXT: store i32 999, ptr null, align 4
+; RESULT-NEXT: br label %L2.i
+
+; RESULT: L2.i:
+; RESULT-NEXT: store ptr blockaddress(@calls_func_with_fallthrough_blockaddress_use, %L1.i), ptr %alloca.i, align 8
+; RESULT-NEXT: store ptr blockaddress(@calls_func_with_fallthrough_blockaddress_use, %L2.i), ptr %alloca.i, align 8
+; RESULT-NEXT: store ptr blockaddress(@callee_with_fallthrough_blockaddress_use, %L3), ptr %alloca.i, align 8
+; RESULT-NEXT: br label %callee_with_fallthrough_blockaddress_use.exit
+
+; RESULT: callee_with_fallthrough_blockaddress_use.exit:    ; preds = %L2.i
+; RESULT-NEXT: %load.i = load ptr, ptr %alloca.i, align 8
+; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %alloca.i)
+; RESULT-NEXT: store i32 3000, ptr null, align 4
+; RESULT-NEXT: br label %ret
+
+; RESULT: ret:
+; RESULT-NEXT: store i32 4000, ptr null, align 4
+; RESULT-NEXT: ret void
+define void @calls_func_with_fallthrough_blockaddress_use(i1 %arg0) {
+entry:
+  store i32 1000, ptr null
+  br i1 %arg0, label %call, label %ret
+
+call:
+  store i32 2000, ptr null
+  call void @callee_with_fallthrough_blockaddress_use()
+  store i32 3000, ptr null
+  br label %ret
+
+ret:
+  store i32 4000, ptr null
+  ret void
+}
+
+declare i32 @extern_returns_twice() returns_twice
+
+; CHECK-LABEL: define i32 @callee_returns_twice(
+; RESULT-NEXT: %call = call i32 @extern_returns_twice()
+; RESULT-NEXT: %add = add nsw i32 1, %call
+; RESULT-NEXT: ret i32 %add
+define i32 @callee_returns_twice() {
+  %call = call i32 @extern_returns_twice()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+; CHECK-LABEL: define i32 @caller_returns_twice_calls_callee_returns_twice(
+; RESULT-NEXT: %call.i = call i32 @extern_returns_twice()
+; RESULT-NEXT: %add.i = add nsw i32 1, %call.i
+; RESULT-NEXT: %add = add nsw i32 1, %add.i
+; RESULT-NEXT: ret i32 %add
+  define i32 @caller_returns_twice_calls_callee_returns_twice() returns_twice {
+  %call = call i32 @callee_returns_twice()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+; Inliner usually blocks inlining of returns_twice functions into
+; non-returns_twice functions
+; CHECK-LABEL: define i32 @regular_caller_calls_callee_returns_twice() {
+; RESULT-NEXT: %call.i = call i32 @extern_returns_twice()
+; RESULT-NEXT: %add.i = add nsw i32 1, %call.i
+; RESULT-NEXT: %add = add nsw i32 1, %add.i
+; RESULT-NEXT: ret i32 %add
+define i32 @regular_caller_calls_callee_returns_twice() {
+  %call = call i32 @callee_returns_twice()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+; CHECK-LABEL: define void @caller_with_vastart(
+; RESULT-NEXT: %ap = alloca ptr, align 4
+; RESULT-NEXT: %ap2 = alloca ptr, align 4
+; RESULT-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap)
+; RESULT-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap)
+; RESULT-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap)
+; RESULT-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap)
+; RESULT-NEXT: ret void
+define void @caller_with_vastart(ptr noalias nocapture readnone %args, ...) {
+  %ap = alloca ptr, align 4
+  %ap2 = alloca ptr, align 4
+  call void @llvm.va_start.p0(ptr nonnull %ap)
+  call fastcc void @callee_with_vaend(ptr nonnull %ap)
+  call void @llvm.va_start.p0(ptr nonnull %ap)
+  call fastcc void @callee_with_vaend_alwaysinline(ptr nonnull %ap)
+  ret void
+}
+
+; CHECK-LABEL: define fastcc void @callee_with_vaend(
+; RESULT-NEXT: tail call void @llvm.va_end.p0(ptr %a)
+; RESULT-NEXT: ret void
+define fastcc void @callee_with_vaend(ptr %a) {
+  tail call void @llvm.va_end.p0(ptr %a)
+  ret void
+}
+
+; CHECK-LABEL: define internal fastcc void @callee_with_vaend_alwaysinline(
+; RESULT-NEXT: tail call void @llvm.va_end.p0(ptr %a)
+; RESULT-NEXT: ret void
+define internal fastcc void @callee_with_vaend_alwaysinline(ptr %a) alwaysinline {
+  tail call void @llvm.va_end.p0(ptr %a)
+  ret void
+}
+
+; CHECK-LABEL: define i32 @callee_with_va_start(
+define i32 @callee_with_va_start(ptr %a, ...) {
+  %vargs = alloca ptr, align 8
+  tail call void @llvm.va_start.p0(ptr %a)
+  %va1 = va_arg ptr %vargs, i32
+  call void @llvm.va_end(ptr %vargs)
+  ret i32 %va1
+}
+
+; CHECK-LABEL: define i32 @callee_vastart_caller(
+; RESULT-NEXT: %vargs.i = alloca ptr, align 8
+; RESULT-NEXT: %ap = alloca ptr, align 4
+; RESULT-NEXT: %b = load i32, ptr null, align 4
+; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %vargs.i)
+; RESULT-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap)
+; RESULT-NEXT: %va1.i = va_arg ptr %vargs.i, i32
+; RESULT-NEXT: call void @llvm.va_end.p0(ptr %vargs.i)
+; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %vargs.i)
+; RESULT-NEXT: ret i32 %va1.i
+define i32 @callee_vastart_caller(ptr noalias nocapture readnone %args, ...) {
+  %ap = alloca ptr, align 4
+  %b = load i32, ptr null
+  %result = call i32 (ptr, ...) @callee_with_va_start(ptr nonnull %ap, i32 %b)
+  ret i32 %result
+}
+
+declare void @llvm.localescape(...)
+
+; CHECK-LABEL: define internal void @callee_uses_localrecover(
+define internal void @callee_uses_localrecover(ptr %fp) {
+  %a.i8 = call ptr @llvm.localrecover(ptr @callee_uses_localescape, ptr %fp, i32 0)
+  store i32 42, ptr %a.i8
+  ret void
+}
+
+; CHECK-LABEL: define i32 @callee_uses_localescape(
+; RESULT-NEXT: %a = alloca i32, align 4
+; RESULT-NEXT: call void (...) @llvm.localescape(ptr %a)
+; RESULT-NEXT: %fp = call ptr @llvm.frameaddress.p0(i32 0)
+; RESULT-NEXT: %a.i8.i = call ptr @llvm.localrecover(ptr @callee_uses_localescape, ptr %fp, i32 0)
+; RESULT-NEXT: store i32 42, ptr %a.i8.i, align 4
+; RESULT-NEXT: %r = load i32, ptr %a, align 4
+; RESULT-NEXT: ret i32 %r
+define i32 @callee_uses_localescape() alwaysinline {
+  %a = alloca i32
+  call void (...) @llvm.localescape(ptr %a)
+  %fp = call ptr @llvm.frameaddress(i32 0)
+  tail call void @callee_uses_localrecover(ptr %fp)
+  %r = load i32, ptr %a
+  ret i32 %r
+}
+
+; CHECK-LABEL: define i32 @callee_uses_localescape_caller(
+; RESULT-NEXT: %a.i = alloca i32, align 4
+; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %a.i)
+; RESULT-NEXT: call void (...) @llvm.localescape(ptr %a.i)
+; RESULT-NEXT: %fp.i = call ptr @llvm.frameaddress.p0(i32 0)
+; RESULT-NEXT: %a.i8.i.i = call ptr @llvm.localrecover(ptr @callee_uses_localescape, ptr %fp.i, i32 0)
+; RESULT-NEXT: store i32 42, ptr %a.i8.i.i, align 4
+; RESULT-NEXT: %r.i = load i32, ptr %a.i, align 4
+; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %a.i)
+; RESULT-NEXT: ret i32 %r.i
+define i32 @callee_uses_localescape_caller() {
+  %r = tail call i32 @callee_uses_localescape()
+  ret i32 %r
+}
+
+declare void @llvm.icall.branch.funnel(...)
+
+; CHECK-LABEL: define void @callee_uses_branch_funnel(
+; RESULT-NEXT: musttail call void (...) @llvm.icall.branch.funnel(...)
+; RESULT-NEXT: ret void
+define void @callee_uses_branch_funnel(...) {
+  musttail call void (...) @llvm.icall.branch.funnel(...)
+  ret void
+}
+
+; FIXME: This should fail the verifier after inlining
+; CHECK-LABEL: define void @callee_branch_funnel_musttail_caller(
+; RESULT-NEXT: call void (...) @llvm.icall.branch.funnel()
+; RESULT-NEXT: ret void
+define void @callee_branch_funnel_musttail_caller() {
+  call void (...) @callee_uses_branch_funnel()
+  ret void
+}
+
+; Ignore noinline on the callee function
+; CHECK-LABEL: define void @noinline_callee(
+; RESULT-NEXT: store i32 123, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @noinline_callee(ptr %arg) {
+  store i32 123, ptr %arg
+  ret void
+}
+
+; CHECK-LABEL: define void @calls_noinline_func(
+; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4
+; RESULT-NEXT: ret void
+define void @calls_noinline_func(ptr %outer.arg) {
+  call void @noinline_callee(ptr %outer.arg)
+  ret void
+}
+
+; Ignore noinline on the callsite
+; CHECK-LABEL: define void @calls_noinline_callsite(
+; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4
+; RESULT-NEXT: ret void
+define void @calls_noinline_callsite(ptr %outer.arg) {
+  call void @simple_callee(ptr %outer.arg) noinline
+  ret void
+}
+
+; Ignore optnone
+; CHECK-LABEL: define void @optnone_callee(
+; RESULT-NEXT: store i32 5555, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @optnone_callee(ptr %arg) optnone noinline {
+  store i32 5555, ptr %arg
+  ret void
+}
+
+; CHECK-LABEL: define void @calls_optnone_callee(
+; RESULT-NEXT: store i32 5555, ptr %outer.arg, align 4
+; RESULT-NEXT: ret void
+define void @calls_optnone_callee(ptr %outer.arg) {
+  call void @optnone_callee(ptr %outer.arg)
+  ret void
+}
+
+; CHECK-LABEL: define void @optnone_caller(
+; RESULT-NEXT: store i32 123, ptr %outer.arg, align 4
+; RESULT-NEXT: ret void
+define void @optnone_caller(ptr %outer.arg) optnone noinline {
+  call void @simple_callee(ptr %outer.arg)
+  ret void
+}
+
+; CHECK-LABEL: define weak void @interposable_callee(
+; RESULT-NEXT: store i32 2024, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define weak void @interposable_callee(ptr %arg) {
+  store i32 2024, ptr %arg
+  ret void
+}
+
+; Ignore interposable linkage
+; CHECK-LABEL: @calls_interposable_callee(
+; RESULT-NEXT: store i32 2024, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @calls_interposable_callee(ptr %arg) {
+  call void @interposable_callee(ptr %arg)
+  ret void
+}
+
+; Ignore null_pointer_is_valid
+; CHECK-LABEL: @null_pointer_is_valid_callee(
+; RESULT-NEXT: store i32 42069, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @null_pointer_is_valid_callee(ptr %arg) null_pointer_is_valid {
+  store i32 42069, ptr %arg
+  ret void
+}
+
+; CHECK-LABEL: @calls_null_pointer_is_valid_callee(
+; RESULT-NEXT: store i32 42069, ptr %arg, align 4
+; RESULT-NEXT: ret void
+define void @calls_null_pointer_is_valid_callee(ptr %arg) {
+  call void @null_pointer_is_valid_callee(ptr %arg)
+  ret void
+}
+
+; CHECK-LABEL: @byval_arg_uses_non_alloca_addrspace(
+; RESULT-NEXT: %load = load i32, ptr addrspace(1) %arg, align 4
+; RESULT-NEXT: ret i32 %load
+define i32 @byval_arg_uses_non_alloca_addrspace(ptr addrspace(1) byval(i32) %arg) {
+  %load = load i32, ptr addrspace(1) %arg
+  ret i32 %load
+}
+
+; CHECK-LABEL: @calls_byval_arg_uses_non_alloca_addrspace(
+; RESULT-NEXT: %arg1 = alloca i32, align 4, addrspace(1)
+; RESULT-NEXT: call void @llvm.lifetime.start.p1(ptr addrspace(1) %arg1)
+; RESULT-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %arg1, ptr addrspace(1) %arg, i64 4, i1 false)
+; RESULT-NEXT: %load.i = load i32, ptr addrspace(1) %arg1, align 4
+; RESULT-NEXT: call void @llvm.lifetime.end.p1(ptr addrspace(1) %arg1)
+; RESULT-NEXT: ret i32 %load.i
+define i32 @calls_byval_arg_uses_non_alloca_addrspace(ptr addrspace(1) %arg) {
+  %call = call i32 @byval_arg_uses_non_alloca_addrspace(ptr addrspace(1) byval(i32) %arg)
+  ret i32 %call
+}
+
+; CHECK-LABEL: define void @callee_stacksize(
+; RESULT-NEXT: %alloca = alloca [4096 x i32]
+; RESULT-NEXT: store i32 12345678, ptr %arg
+; RESULT-NEXT: store i32 0, ptr %alloca
+; RESULT-NEXT: ret void
+define void @callee_stacksize(ptr %arg) "inline-max-stacksize"="4" {
+  %alloca = alloca [4096 x i32]
+  store i32 12345678, ptr %arg
+  store i32 0, ptr %alloca
+  ret void
+}
+
+; CHECK-LABEL: define void @caller_stacksize(
+; RESULT-NEXT: %alloca.i = alloca [4096 x i32], align 4
+; RESULT-NEXT: call void @llvm.lifetime.start.p0(ptr %alloca.i)
+; RESULT-NEXT: store i32 12345678, ptr %arg, align 4
+; RESULT-NEXT: store i32 0, ptr %alloca.i, align 4
+; RESULT-NEXT: call void @llvm.lifetime.end.p0(ptr %alloca.i)
+; RESULT-NEXT: ret void
+define void @caller_stacksize(ptr %arg) {
+  call void @callee_stacksize(ptr %arg)
+  ret void
+}
+
+; CHECK-LABEL: define void @callee_dynamic_alloca(
+; RESULT-NEXT: %alloca = alloca i32, i32 %n, align 4
+; RESULT-NEXT: store i32 12345678, ptr %arg, align 4
+; RESULT-NEXT: store i32 0, ptr %alloca, align 4
+; RESULT-NEXT: ret void
+define void @callee_dynamic_alloca(ptr %arg, i32 %n) "inline-max-stacksize"="4" {
+  %alloca = alloca i32, i32 %n
+  store i32 12345678, ptr %arg
+  store i32 0, ptr %alloca
+  ret void
+}
+
+; CHECK-LABEL: define void @caller_dynamic_alloca(
+; RESULT-NEXT: %savedstack = call ptr @llvm.stacksave.p0()
+; RESULT-NEXT: %alloca.i = alloca i32, i32 %size, align 4
+; RESULT-NEXT: store i32 12345678, ptr %arg, align 4
+; RESULT-NEXT: store i32 0, ptr %alloca.i, align 4
+; RESULT-NEXT: call void @llvm.stackrestore.p0(ptr %savedstack)
+; RESULT-NEXT: ret void
+define void @caller_dynamic_alloca(ptr %arg, i32 %size) {
+  call void @callee_dynamic_alloca(ptr %arg, i32 %size)
+  ret void
+}
+
+declare void @extern_noduplicate() noduplicate
+
+; CHECK-LABEL: define void @callee_noduplicate_calls(
+; RESULT-NEXT: call void @extern_noduplicate()
+; RESULT-NEXT: call void @extern_noduplicate()
+; RESULT-NEXT: ret void
+define void @callee_noduplicate_calls() {
+  call void @extern_noduplicate()
+  call void @extern_noduplicate()
+  ret void
+}
+
+; Ignore noduplicate restrictions
+; CHECK-LABEL: define void @caller_noduplicate_calls_callee(
+; RESULT-NEXT: call void @extern_noduplicate()
+; RESULT-NEXT: call void @extern_noduplicate()
+; RESULT-NEXT: call void @extern_noduplicate()
+; RESULT-NEXT: call void @extern_noduplicate()
+; RESULT-NEXT: ret void
+define void @caller_noduplicate_calls_callee() {
+  call void @callee_noduplicate_calls()
+  call void @callee_noduplicate_calls()
+  ret void
+}
+
+; CHECK-LABEL: define void @sanitize_address_callee(
+; RESULT-NEXT: store i32 333, ptr %arg
+; RESULT-NEXT: ret void
+define void @sanitize_address_callee(ptr %arg) sanitize_address {
+  store i32 333, ptr %arg
+  ret void
+}
+
+; CHECK-LABEL: define void @no_sanitize_address_caller(
+; RESULT-NEXT: store i32 333, ptr %arg
+; RESULT-NEXT: ret void
+define void @no_sanitize_address_caller(ptr %arg) {
+  call void @sanitize_address_callee(ptr %arg)
+  ret void
+}
+
+; CHECK-LABEL: define float @nonstrictfp_callee(
+; RESULT-NEXT: %add = fadd float %a, %a
+; RESULT-NEXT: ret float %add
+define float @nonstrictfp_callee(float %a) {
+  %add = fadd float %a, %a
+  ret float %add
+}
+
+; CHECK-LABEL: define float @strictfp_caller(
+; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32(
+; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32(
+; RESULT-NEXT: ret float %add
+define float @strictfp_caller(float %a) strictfp {
+  %call = call float @nonstrictfp_callee(float %a) strictfp
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %call, float 2.0, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  ret float %add
+}
+
+; CHECK-LABEL: define float @strictfp_callee(
+; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32(
+; RESULT-NEXT: ret float
+define float @strictfp_callee(float %a) strictfp {
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  ret float %add
+}
+
+; FIXME: This should not inline. The inlined case should fail the
+; verifier, but it does not.
+; CHECK-LABEL: define float @nonstrictfp_caller(
+; RESULT-NEXT: call float @llvm.experimental.constrained.fadd.f32(
+; RESULT-NEXT: fadd float
+; RESULT-NEXT: ret float
+define float @nonstrictfp_caller(float %a) {
+  %call = call float @strictfp_callee(float %a)
+  %add1 = fadd float %call, 2.0
+  ret float %add1
+}
+
+define void @caller_also_has_non_callee_use() {
+  call void @simple_callee(ptr @simple_callee)
+  ret void
+}